diff --git a/libs/openant-core/parsers/php/function_extractor.py b/libs/openant-core/parsers/php/function_extractor.py index 582a689..236e06a 100644 --- a/libs/openant-core/parsers/php/function_extractor.py +++ b/libs/openant-core/parsers/php/function_extractor.py @@ -437,6 +437,52 @@ def _extract_functions_from_tree(self, tree, source: bytes, file_path: Path, stack.append((child, class_name, new_namespace_name)) continue # Don't walk children again + elif node.type == 'anonymous_class': + # `new class { ... }` (PHP 7+) has no source name. Without a synthetic + # identity its methods fall through the catch-all else with the OUTER + # class_name (None at top level), so they're keyed as bare functions and + # two distinct anonymous classes that both define e.g. handle() collide on + # one id (the later silently overwrites the earlier). Synthesize a stable, + # location-based name so each anonymous class is distinct and its methods + # are qualified (class@anonymous::.method). Line AND column are + # both needed: two `new class {}` on one physical line share a start line, + # so column is what keeps them distinct (else they'd still collide). + anon_name = ( + f"class@anonymous:{node.start_point[0] + 1}:{node.start_point[1]}" + ) + body_node = None + for child in node.children: + if child.type == 'declaration_list': + body_node = child + break + + if body_node: + methods = [] + for child in body_node.children: + if child.type == 'method_declaration': + mname = self._get_function_name(child, source) + if mname: + if self._is_static_method(child, source): + methods.append(f"static:{mname}") + else: + methods.append(mname) + + self.classes[f"{relative_path}:{anon_name}"] = { + 'name': anon_name, + 'file_path': relative_path, + 'start_line': node.start_point[0] + 1, + 'end_line': node.end_point[0] + 1, + 'methods': methods, + 'superclass': None, + 'interfaces': [], + 'namespace_name': namespace_name, + } + self.stats['total_classes'] += 1 + + for child in reversed(body_node.children): + stack.append((child, anon_name, namespace_name)) + continue # Don't walk children again + else: for child in reversed(node.children): stack.append((child, class_name, namespace_name)) diff --git a/libs/openant-core/parsers/zig/function_extractor.py b/libs/openant-core/parsers/zig/function_extractor.py index 7e5c2f5..10b7521 100644 --- a/libs/openant-core/parsers/zig/function_extractor.py +++ b/libs/openant-core/parsers/zig/function_extractor.py @@ -131,6 +131,15 @@ def _walk_node( if func_info: func_id = f"{file_path}:{func_info['qualified_name']}" functions[func_id] = func_info + # Zig's generic-container idiom is a type-returning function: + # `fn List(comptime T: type) type { return struct { fn push() ... }; }`. + # The returned struct is anonymous in the AST (not a `const Name = + # struct {...}` variable_declaration), so without this its methods would + # recurse with current_struct unchanged and be emitted as bare top-level + # functions. Thread the function name as the struct context so they + # qualify as List.push and distinct containers' methods don't collide. + if self._returns_type(node, source): + child_struct = func_info["name"] elif node.type == "variable_declaration": # `const Foo = struct { ... };` -- a named struct/enum definition. @@ -206,6 +215,23 @@ def _extract_function( "unit_type": unit_type, } + def _returns_type(self, node: Node, source: bytes) -> bool: + """True if a function_declaration's return type is the builtin `type` — Zig's + generic-container idiom (`fn Foo(...) type { return struct {...} }`). + + The return type is the function_declaration's direct child that follows the + `parameters` node (a `builtin_type`). This deliberately inspects only direct + children, so the `type` inside a `comptime T: type` parameter (nested under + `parameters`) is not mistaken for the return type. + """ + seen_params = False + for child in node.children: + if child.type in ("parameters", "ParamDeclList"): + seen_params = True + elif seen_params and child.type == "builtin_type": + return self._get_node_text(child, source).strip() == "type" + return False + def _extract_parameters(self, node: Node, source: bytes) -> List[str]: """Extract parameter names from a parameter list node.""" params = [] diff --git a/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py b/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py new file mode 100644 index 0000000..5fc1418 --- /dev/null +++ b/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py @@ -0,0 +1,84 @@ +"""Regression test for the PHP anonymous-class method-attribution bug. + +`new class { ... }` (PHP 7+) produces a tree-sitter `anonymous_class` node, which had +no handler in _extract_functions_from_tree and fell through the catch-all `else` — so +its methods were emitted with class_name=None (bare top-level functions). Two distinct +anonymous classes that both define e.g. handle() then collided on one unit id and the +later silently overwrote the earlier (data loss). + +Driven through the REAL extractor (FunctionExtractor.extract_all) on a temp .php file. + +DEPENDENCY (human reviewers + agents): this fix assumes the reworked +`_extract_functions_from_tree` traversal added by upstream PR #111 (PHP parser). On raw +`master` the PHP extractor has a materially different shape and these tests fail — this +change is NOT landable on master standalone. Depends-on: #111. Base this on +staging/parser-fix-stack (which already contains #111) to run it green. +""" + +import os +import sys +import tempfile +from pathlib import Path + +_CORE_ROOT = Path(__file__).resolve().parents[3] +sys.path.insert(0, str(_CORE_ROOT)) + +from parsers.php.function_extractor import FunctionExtractor + + +def _extract(php_source: str, filename: str = "anon.php") -> dict: + repo = tempfile.mkdtemp() + with open(os.path.join(repo, filename), "w") as fh: + fh.write(php_source) + return FunctionExtractor(repo).extract_all([filename]) + + +def test_anon_class_method_attributed_to_synthetic_class(): + src = ( + " dict: + workdir = tempfile.mkdtemp() + with open(os.path.join(workdir, "m.zig"), "w") as fh: + fh.write(src) + return FunctionExtractor(workdir, {"files": [{"path": "m.zig"}]}).extract() + + +def _zig_parser_is_grammar_aligned() -> bool: + """Probe the PREREQUISITE behavior (not this fix's): does a *named* struct's method + extract as Container.method? That capability is provided by the tree-sitter-zig + grammar-alignment work (>=1.1.2 node names struct_declaration/variable_declaration; + PRs 87/110, commit 322920e), independent of the generic-container fix under test. + On a base whose parser still matches stale node names (VarDecl/container_decl), no + struct methods extract at all, so these tests cannot pass for reasons unrelated to + the fix.""" + probe = "const _Probe = struct {\n pub fn _m(self: _Probe) void { _ = self; }\n};\n" + return "m.zig:_Probe._m" in _extract(probe)["functions"] + + +# Skip (not fail) with an explanatory message when run on a base that lacks the +# grammar-alignment prerequisite — so a human or agent running this on raw master sees +# *why* instead of a cryptic assertion failure. Supported base: staging/parser-fix-stack, +# which carries upstream PR #110 (Zig parser realignment) AND the tree-sitter-zig>=1.1.2 +# grammar pin. This is NOT landable on master standalone. +pytestmark = pytest.mark.skipif( + not _zig_parser_is_grammar_aligned(), + reason=( + "Zig parser not grammar-aligned (needs tree-sitter-zig>=1.1.2 node names " + "struct_declaration/variable_declaration, from upstream PR #110 + the grammar " + "pin). On such a base no struct methods extract, so the generic-container fix " + "cannot pass. Supported base: staging/parser-fix-stack — not landable on master." + ), +) + + +def test_generic_container_method_qualified_to_container(): + src = ( + "pub fn List(comptime T: type) type {\n" + " return struct {\n" + " pub fn push(self: *@This(), x: T) void { _ = self; _ = x; }\n" + " };\n" + "}\n" + "fn ordinary() void {}\n" + ) + out = _extract(src) + funcs = out["functions"] + assert "m.zig:List.push" in funcs, f"List.push missing; keys = {sorted(funcs)}" + info = funcs["m.zig:List.push"] + assert info["class_name"] == "List" + assert info["qualified_name"] == "List.push" + assert info["unit_type"] == "method" + # The method must NOT leak as a bare top-level function. + assert "m.zig:push" not in funcs, f"unqualified push leaked: {sorted(funcs)}" + # The plain function is unaffected. + assert "m.zig:ordinary" in funcs, sorted(funcs) + + +def test_two_generic_containers_methods_no_collision(): + src = ( + "pub fn List(comptime T: type) type {\n" + " return struct { pub fn len(self: *@This()) usize { _ = self; return 0; } };\n" + "}\n" + "pub fn Ring(comptime T: type) type {\n" + " return struct { pub fn len(self: *@This()) usize { _ = self; return 1; } };\n" + "}\n" + ) + funcs = _extract(src)["functions"] + assert "m.zig:List.len" in funcs, f"keys = {sorted(funcs)}" + assert "m.zig:Ring.len" in funcs, f"silent collision/data-loss; keys = {sorted(funcs)}"