Skip to content

Commit ce10f70

Browse files
bokelleyclaude
andcommitted
fix(migrate): don't corrupt mixed numbered+unknown imports under --auto-apply
Per review on #540 — a generated_poc import mixing a known numbered asset (Assets81) with an unknown one (Assets149) was getting half-rewritten: Input : from adcp.types.generated_poc.core.format import Assets81, Assets149 Output: from adcp.types.generated_poc.core.format import VideoFormatAsset, Assets149 leaving VideoFormatAsset imported from a private module path that doesn't export it — guaranteed ImportError in adopter source. Root cause: step 1 (file-wide Assets<N> substitution) ran before step 2 (import-path safety check) had a chance to veto the line. Fix: line-by-line rewrite. For each generated_poc single-line import, compute the post-numbered-substitution symbol set up-front; only run both substitutions when every symbol on the line is in ``_AUTO_APPLY_PUBLIC_SYMBOLS``. Mixed-unsafe lines are left entirely alone. Non-import lines still get free numbered substitution. Scan-time also fixed: numbered references on a mixed-unsafe import line now report ``flag_numbered`` instead of ``auto_applied`` so the report matches the file content. Adds regression test ``test_auto_apply_mixed_numbered_known_unknown_does_not_corrupt_import``. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 67c07d2 commit ce10f70

2 files changed

Lines changed: 101 additions & 50 deletions

File tree

src/adcp/migrate/v3_to_v4.py

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,7 @@ def _iter_python_files(root: Path) -> list[Path]:
331331
# Regex used in the ``--auto-apply`` import-path fix pass: matches the
332332
# ``from adcp.types.generated_poc...`` prefix so it can be replaced with
333333
# ``from adcp.types``.
334-
_GENERATED_POC_MODULE_RE = re.compile(
335-
r"from\s+adcp\.types\.generated_poc(?:\.[\w.]+)?\s+import"
336-
)
334+
_GENERATED_POC_MODULE_RE = re.compile(r"from\s+adcp\.types\.generated_poc(?:\.[\w.]+)?\s+import")
337335

338336
# Union of symbol names that ``--auto-apply`` can safely reroute to
339337
# ``adcp.types``: the explicit flag_private symbol map plus every public
@@ -378,6 +376,28 @@ def scan_file(
378376
auto_apply_hits = False # any numbered or private-import rewrites queued
379377

380378
for lineno, line in enumerate(original.splitlines(), start=1):
379+
# Pre-pass: when this line is a single-line ``generated_poc``
380+
# import, decide whether the line as a whole is auto-apply-safe.
381+
# An import is *unsafe* when at least one of its symbols (after
382+
# the hypothetical numbered substitution) isn't in
383+
# ``_AUTO_APPLY_PUBLIC_SYMBOLS``; rewriting one symbol while
384+
# leaving another behind would leave the line importing a
385+
# public name from a private module — guaranteed ImportError.
386+
# The rewrite block (`updated.splitlines()` later) skips
387+
# unsafe-mixed lines; the per-symbol Finding emission below
388+
# also treats numbered references on those lines as
389+
# ``flag_numbered`` rather than ``auto_applied`` so the report
390+
# matches the file content.
391+
line_is_mixed_unsafe_import = False
392+
if "adcp.types.generated_poc" in line:
393+
from_match = _GENERATED_POC_FROM_IMPORT.search(line)
394+
if from_match:
395+
raw_syms = [s.strip() for s in from_match.group(1).split(",")]
396+
pre_syms = [r.split(" as ")[0].strip() for r in raw_syms if r.strip()]
397+
post_syms = [NUMBERED_ASSETS_RENAMES.get(s, s) for s in pre_syms]
398+
if pre_syms and not all(s in _AUTO_APPLY_PUBLIC_SYMBOLS for s in post_syms):
399+
line_is_mixed_unsafe_import = True
400+
381401
for old, new in ASSET_CONTENT_RENAMES.items():
382402
for match in _RENAME_PATTERNS[old].finditer(line):
383403
findings.append(
@@ -411,7 +431,7 @@ def scan_file(
411431
for match in NUMBERED_ASSETS_PATTERN.finditer(line):
412432
symbol = match.group(0)
413433
alias = NUMBERED_ASSETS_RENAMES.get(symbol)
414-
if auto_apply and alias is not None:
434+
if auto_apply and alias is not None and not line_is_mixed_unsafe_import:
415435
findings.append(
416436
Finding(
417437
kind="auto_applied",
@@ -489,8 +509,7 @@ def scan_file(
489509
continue
490510

491511
all_known = all(
492-
repl is not None
493-
or (auto_apply and symbol in NUMBERED_ASSETS_RENAMES)
512+
repl is not None or (auto_apply and symbol in NUMBERED_ASSETS_RENAMES)
494513
for symbol, repl in parsed
495514
)
496515

@@ -588,38 +607,45 @@ def scan_file(
588607
needs_write = True
589608

590609
if apply_changes and auto_apply and auto_apply_hits:
591-
# Step 1: substitute Assets<N> → SemanticAlias everywhere
592-
# (handles both usage sites and import symbols).
593-
for old, new in NUMBERED_ASSETS_RENAMES.items():
594-
updated = _NUMBERED_RENAME_PATTERNS[old].sub(new, updated)
595-
596-
# Step 2: fix any generated_poc import whose symbols are now all
597-
# resolvable to adcp.types. This covers two cases:
598-
#
599-
# a. ``from generated_poc.core.format import Assets81`` became
600-
# ``from generated_poc.core.format import VideoFormatAsset``
601-
# after step 1 — rewrite the module path.
602-
#
603-
# b. ``from generated_poc.core.x import ContextObject`` whose
604-
# all-known flag_private findings were promoted to auto_applied
605-
# — rewrite the module path here too.
606-
#
607-
# The check uses ``_AUTO_APPLY_PUBLIC_SYMBOLS`` (a frozen set of all
608-
# known-safe names) so we never fix an import that still references
609-
# an unknown symbol.
610+
# Process the file line-by-line so generated_poc imports get a
611+
# safety check against the post-numbered-substitution symbol set
612+
# before any rewrite happens. The earlier "Step 1: substitute
613+
# Assets<N> file-wide; Step 2: fix import paths only when safe"
614+
# ordering corrupted mixed lines like
615+
# ``from generated_poc.core.format import Assets81, Assets149``
616+
# — Assets81 became VideoFormatAsset while Assets149 stayed,
617+
# leaving VideoFormatAsset imported from a private module.
610618
new_lines: list[str] = []
611619
for text_line in updated.splitlines(keepends=True):
612-
if "adcp.types.generated_poc" not in text_line:
620+
is_generated_poc_import = (
621+
"adcp.types.generated_poc" in text_line
622+
and _GENERATED_POC_FROM_IMPORT.search(text_line) is not None
623+
)
624+
if is_generated_poc_import:
625+
m = _GENERATED_POC_FROM_IMPORT.search(text_line)
626+
assert m is not None # narrowed above
627+
raw_syms = [s.strip() for s in m.group(1).split(",")]
628+
pre_syms = [r.split(" as ")[0].strip() for r in raw_syms if r.strip()]
629+
# Apply the hypothetical numbered rename to each symbol
630+
# so we can check if the *post-rename* symbol set is
631+
# safe.
632+
post_syms = [NUMBERED_ASSETS_RENAMES.get(s, s) for s in pre_syms]
633+
if post_syms and all(s in _AUTO_APPLY_PUBLIC_SYMBOLS for s in post_syms):
634+
# Whole import is safe — substitute numbered names
635+
# AND fix the module path.
636+
for old, new in NUMBERED_ASSETS_RENAMES.items():
637+
text_line = _NUMBERED_RENAME_PATTERNS[old].sub(new, text_line)
638+
text_line = _GENERATED_POC_MODULE_RE.sub("from adcp.types import", text_line)
639+
# Mixed line — leave it alone. The findings list still
640+
# carries the per-symbol flag_private and flag_numbered
641+
# entries so the adopter sees the work to do.
613642
new_lines.append(text_line)
614643
continue
615-
m = _GENERATED_POC_FROM_IMPORT.search(text_line)
616-
if m:
617-
raw_syms = [s.strip() for s in m.group(1).split(",")]
618-
syms = [r.split(" as ")[0].strip() for r in raw_syms if r.strip()]
619-
if syms and all(sym in _AUTO_APPLY_PUBLIC_SYMBOLS for sym in syms):
620-
text_line = _GENERATED_POC_MODULE_RE.sub(
621-
"from adcp.types import", text_line
622-
)
644+
# Non-import lines: substitute numbered names freely (the
645+
# semantic alias is already importable via adcp.types and
646+
# any local reference the line carries is a usage site).
647+
for old, new in NUMBERED_ASSETS_RENAMES.items():
648+
text_line = _NUMBERED_RENAME_PATTERNS[old].sub(new, text_line)
623649
new_lines.append(text_line)
624650
updated = "".join(new_lines)
625651
needs_write = True
@@ -634,9 +660,7 @@ def run(root: Path, *, apply_changes: bool = False, auto_apply: bool = False) ->
634660
report = Report()
635661
for path in _iter_python_files(root):
636662
report.scanned_files += 1
637-
findings, new_contents = scan_file(
638-
path, apply_changes=apply_changes, auto_apply=auto_apply
639-
)
663+
findings, new_contents = scan_file(path, apply_changes=apply_changes, auto_apply=auto_apply)
640664
for f in findings:
641665
report.add(f)
642666
if new_contents is not None:
@@ -728,9 +752,7 @@ def _format_text_report(report: Report, *, apply_changes: bool, auto_apply: bool
728752
lines.append(f"Rewrote {report.rewritten_files} files in place.")
729753
lines.append("Review with `git diff` before committing.")
730754

731-
if not auto_apply and any(
732-
f.kind in ("flag_private", "flag_numbered") for f in report.flagged
733-
):
755+
if not auto_apply and any(f.kind in ("flag_private", "flag_numbered") for f in report.flagged):
734756
lines.append("")
735757
lines.append(
736758
"Tip: rerun with --auto-apply to mechanically fix the "

tests/test_migrate_v3_to_v4.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -659,8 +659,7 @@ def test_auto_apply_rewrites_all_known_private_import(tmp_path: Path) -> None:
659659
path = _write(
660660
tmp_path,
661661
"code.py",
662-
"from adcp.types.generated_poc.core.context import ContextObject\n"
663-
"x = ContextObject()\n",
662+
"from adcp.types.generated_poc.core.context import ContextObject\n" "x = ContextObject()\n",
664663
)
665664
report = v3_to_v4.run(tmp_path, apply_changes=True, auto_apply=True)
666665

@@ -723,7 +722,8 @@ def test_auto_apply_mixed_line_not_rewritten(tmp_path: Path) -> None:
723722

724723
# Unknown symbol gets a generic flag too (silent-drop bug is fixed).
725724
generic_flags = [
726-
f for f in report.flagged
725+
f
726+
for f in report.flagged
727727
if f.kind == "flag_private" and f.before == "adcp.types.generated_poc"
728728
]
729729
assert len(generic_flags) >= 1
@@ -759,16 +759,15 @@ def test_auto_apply_rewrites_numbered_asset_and_fixes_import_path(tmp_path: Path
759759
path = _write(
760760
tmp_path,
761761
"code.py",
762-
"from adcp.types.generated_poc.core.format import Assets81\n"
763-
"slot: Assets81\n",
762+
"from adcp.types.generated_poc.core.format import Assets81\n" "slot: Assets81\n",
764763
)
765764
v3_to_v4.run(tmp_path, apply_changes=True, auto_apply=True)
766765

767766
rewritten = path.read_text()
768767
assert "Assets81" not in rewritten
769-
assert "adcp.types.generated_poc" not in rewritten, (
770-
"generated_poc import path must be fixed after numbered rename"
771-
)
768+
assert (
769+
"adcp.types.generated_poc" not in rewritten
770+
), "generated_poc import path must be fixed after numbered rename"
772771
assert "from adcp.types import VideoFormatAsset" in rewritten
773772
assert "slot: VideoFormatAsset" in rewritten
774773

@@ -797,8 +796,7 @@ def test_auto_apply_numbered_plus_known_symbol_same_line(tmp_path: Path) -> None
797796
path = _write(
798797
tmp_path,
799798
"code.py",
800-
"from adcp.types.generated_poc.core.x import Assets81, ContextObject\n"
801-
"slot: Assets81\n",
799+
"from adcp.types.generated_poc.core.x import Assets81, ContextObject\n" "slot: Assets81\n",
802800
)
803801
report = v3_to_v4.run(tmp_path, apply_changes=True, auto_apply=True)
804802

@@ -814,6 +812,37 @@ def test_auto_apply_numbered_plus_known_symbol_same_line(tmp_path: Path) -> None
814812
assert not any(f.kind == "flag_private" for f in report.flagged)
815813

816814

815+
def test_auto_apply_mixed_numbered_known_unknown_does_not_corrupt_import(
816+
tmp_path: Path,
817+
) -> None:
818+
"""Regression: a generated_poc import mixing a known numbered asset
819+
(Assets81) with an unknown numbered asset (Assets149) MUST NOT
820+
silently rewrite Assets81 alone — that would leave VideoFormatAsset
821+
imported from a private module path that doesn't export it
822+
(guaranteed ImportError in adopter source)."""
823+
path = _write(
824+
tmp_path,
825+
"code.py",
826+
"from adcp.types.generated_poc.core.format import Assets81, Assets149\n",
827+
)
828+
report = v3_to_v4.run(tmp_path, apply_changes=True, auto_apply=True)
829+
830+
# File content must be untouched on the unsafe-mixed line.
831+
rewritten = path.read_text()
832+
assert "from adcp.types.generated_poc.core.format import Assets81, Assets149" in rewritten
833+
assert "VideoFormatAsset" not in rewritten
834+
835+
# Findings: Assets81 and Assets149 are both flagged for review (the
836+
# adopter has to split the import or wait for Assets149 to land in
837+
# the rename table). Neither is reported as auto_applied.
838+
auto_applied_names = {f.before for f in report.auto_applied}
839+
assert "Assets81" not in auto_applied_names
840+
assert "Assets149" not in auto_applied_names
841+
flagged_names = {f.before for f in report.flagged if f.kind == "flag_numbered"}
842+
assert "Assets81" in flagged_names
843+
assert "Assets149" in flagged_names
844+
845+
817846
# ---------------------------------------------------------------------------
818847
# --auto-apply: combined behaviour + idempotency
819848
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)