Skip to content

Commit 6aeb5ff

Browse files
committed
fix: normalize complete inventory paths
1 parent 241a8df commit 6aeb5ff

2 files changed

Lines changed: 54 additions & 21 deletions

File tree

scripts/pg_compat/extract_statements.py

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,13 @@ def _jsonl_text(rows):
258258
return text
259259

260260

261+
def _portable_path_key(path):
262+
return tuple(
263+
unicodedata.normalize("NFC", component).casefold()
264+
for component in path.parts
265+
)
266+
267+
261268
def _validate_distinct_paths(input_path, inventory_path, diagnostics_path):
262269
named_paths = (
263270
("--input", Path(input_path)),
@@ -283,27 +290,9 @@ def _validate_distinct_paths(input_path, inventory_path, diagnostics_path):
283290
aliases = False
284291

285292
if not aliases:
286-
left_parent = left_resolved.parent
287-
right_parent = right_resolved.parent
288-
same_parent = left_parent == right_parent
289-
if not same_parent:
290-
try:
291-
same_parent = os.path.samefile(
292-
left_parent,
293-
right_parent,
294-
)
295-
except OSError:
296-
same_parent = False
297-
298-
left_name = unicodedata.normalize(
299-
"NFC",
300-
left_resolved.name,
301-
).casefold()
302-
right_name = unicodedata.normalize(
303-
"NFC",
304-
right_resolved.name,
305-
).casefold()
306-
aliases = same_parent and left_name == right_name
293+
aliases = _portable_path_key(
294+
left_resolved
295+
) == _portable_path_key(right_resolved)
307296

308297
if aliases:
309298
raise ValueError(

tests/pg_compat/test_extract_statements.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,50 @@ def test_rejects_nonexistent_output_names_with_equivalent_unicode(self):
524524
self.assertFalse(inventory_path.exists())
525525
self.assertFalse(diagnostics_path.exists())
526526

527+
def test_rejects_nonexistent_parent_names_differing_only_by_case(self):
528+
with tempfile.TemporaryDirectory() as directory:
529+
directory = Path(directory)
530+
input_path = directory / "raw.jsonl"
531+
inventory_path = directory / "Reports" / "output.jsonl"
532+
diagnostics_path = directory / "reports" / "output.jsonl"
533+
input_path.write_text("", encoding="utf-8")
534+
535+
result = self.run_cli(
536+
"--input",
537+
input_path,
538+
"--inventory",
539+
inventory_path,
540+
"--diagnostics",
541+
diagnostics_path,
542+
)
543+
544+
self.assertNotEqual(result.returncode, 0)
545+
self.assertIn("distinct files", result.stderr)
546+
self.assertFalse(inventory_path.parent.exists())
547+
self.assertFalse(diagnostics_path.parent.exists())
548+
549+
def test_rejects_nonexistent_parent_names_with_equivalent_unicode(self):
550+
with tempfile.TemporaryDirectory() as directory:
551+
directory = Path(directory)
552+
input_path = directory / "raw.jsonl"
553+
inventory_path = directory / "caf\u00e9" / "output.jsonl"
554+
diagnostics_path = directory / "cafe\u0301" / "output.jsonl"
555+
input_path.write_text("", encoding="utf-8")
556+
557+
result = self.run_cli(
558+
"--input",
559+
input_path,
560+
"--inventory",
561+
inventory_path,
562+
"--diagnostics",
563+
diagnostics_path,
564+
)
565+
566+
self.assertNotEqual(result.returncode, 0)
567+
self.assertIn("distinct files", result.stderr)
568+
self.assertFalse(inventory_path.parent.exists())
569+
self.assertFalse(diagnostics_path.parent.exists())
570+
527571
@unittest.skipUnless(hasattr(os, "symlink"), "symlinks are unavailable")
528572
def test_rejects_existing_symlink_path_aliases(self):
529573
with tempfile.TemporaryDirectory() as directory:

0 commit comments

Comments
 (0)