Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions libs/openant-core/parsers/php/function_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,52 @@ def _extract_functions_from_tree(self, tree, source: bytes, file_path: Path,
stack.append((child, class_name, new_namespace_name))
continue # Don't walk children again

elif node.type == 'anonymous_class':
# `new class { ... }` (PHP 7+) has no source name. Without a synthetic
# identity its methods fall through the catch-all else with the OUTER
# class_name (None at top level), so they're keyed as bare functions and
# two distinct anonymous classes that both define e.g. handle() collide on
# one id (the later silently overwrites the earlier). Synthesize a stable,
# location-based name so each anonymous class is distinct and its methods
# are qualified (class@anonymous:<line>:<col>.method). Line AND column are
# both needed: two `new class {}` on one physical line share a start line,
# so column is what keeps them distinct (else they'd still collide).
anon_name = (
f"class@anonymous:{node.start_point[0] + 1}:{node.start_point[1]}"
)
body_node = None
for child in node.children:
if child.type == 'declaration_list':
body_node = child
break

if body_node:
methods = []
for child in body_node.children:
if child.type == 'method_declaration':
mname = self._get_function_name(child, source)
if mname:
if self._is_static_method(child, source):
methods.append(f"static:{mname}")
else:
methods.append(mname)

self.classes[f"{relative_path}:{anon_name}"] = {
'name': anon_name,
'file_path': relative_path,
'start_line': node.start_point[0] + 1,
'end_line': node.end_point[0] + 1,
'methods': methods,
'superclass': None,
'interfaces': [],
'namespace_name': namespace_name,
}
self.stats['total_classes'] += 1

for child in reversed(body_node.children):
stack.append((child, anon_name, namespace_name))
continue # Don't walk children again

else:
for child in reversed(node.children):
stack.append((child, class_name, namespace_name))
Expand Down
26 changes: 26 additions & 0 deletions libs/openant-core/parsers/zig/function_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,15 @@ def _walk_node(
if func_info:
func_id = f"{file_path}:{func_info['qualified_name']}"
functions[func_id] = func_info
# Zig's generic-container idiom is a type-returning function:
# `fn List(comptime T: type) type { return struct { fn push() ... }; }`.
# The returned struct is anonymous in the AST (not a `const Name =
# struct {...}` variable_declaration), so without this its methods would
# recurse with current_struct unchanged and be emitted as bare top-level
# functions. Thread the function name as the struct context so they
# qualify as List.push and distinct containers' methods don't collide.
if self._returns_type(node, source):
child_struct = func_info["name"]

elif node.type == "variable_declaration":
# `const Foo = struct { ... };` -- a named struct/enum definition.
Expand Down Expand Up @@ -206,6 +215,23 @@ def _extract_function(
"unit_type": unit_type,
}

def _returns_type(self, node: Node, source: bytes) -> bool:
"""True if a function_declaration's return type is the builtin `type` — Zig's
generic-container idiom (`fn Foo(...) type { return struct {...} }`).

The return type is the function_declaration's direct child that follows the
`parameters` node (a `builtin_type`). This deliberately inspects only direct
children, so the `type` inside a `comptime T: type` parameter (nested under
`parameters`) is not mistaken for the return type.
"""
seen_params = False
for child in node.children:
if child.type in ("parameters", "ParamDeclList"):
seen_params = True
elif seen_params and child.type == "builtin_type":
return self._get_node_text(child, source).strip() == "type"
return False

def _extract_parameters(self, node: Node, source: bytes) -> List[str]:
"""Extract parameter names from a parameter list node."""
params = []
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Regression test for the PHP anonymous-class method-attribution bug.

`new class { ... }` (PHP 7+) produces a tree-sitter `anonymous_class` node, which had
no handler in _extract_functions_from_tree and fell through the catch-all `else` — so
its methods were emitted with class_name=None (bare top-level functions). Two distinct
anonymous classes that both define e.g. handle() then collided on one unit id and the
later silently overwrote the earlier (data loss).

Driven through the REAL extractor (FunctionExtractor.extract_all) on a temp .php file.

DEPENDENCY (human reviewers + agents): this fix assumes the reworked
`_extract_functions_from_tree` traversal added by upstream PR #111 (PHP parser). On raw
`master` the PHP extractor has a materially different shape and these tests fail — this
change is NOT landable on master standalone. Depends-on: #111. Base this on
staging/parser-fix-stack (which already contains #111) to run it green.
"""

import os
import sys
import tempfile
from pathlib import Path

_CORE_ROOT = Path(__file__).resolve().parents[3]
sys.path.insert(0, str(_CORE_ROOT))

from parsers.php.function_extractor import FunctionExtractor


def _extract(php_source: str, filename: str = "anon.php") -> dict:
repo = tempfile.mkdtemp()
with open(os.path.join(repo, filename), "w") as fh:
fh.write(php_source)
return FunctionExtractor(repo).extract_all([filename])


def test_anon_class_method_attributed_to_synthetic_class():
src = (
"<?php\n"
"function make() {\n"
" return new class {\n"
" public function handle() { return 1; }\n"
" };\n"
"}\n"
)
funcs = _extract(src)["functions"]
handle = [v for v in funcs.values() if v["name"] == "handle"]
assert len(handle) == 1, f"expected one handle unit; got {sorted(funcs)}"
info = handle[0]
# The method must be attributed to a non-None synthetic anonymous-class identity,
# not left as a bare top-level function.
assert info["class_name"], f"handle has no class_name: {info}"
assert info["class_name"].startswith("class@anonymous"), info["class_name"]
assert info["qualified_name"].endswith(".handle"), info["qualified_name"]
# make() (the enclosing named function) is unaffected.
assert any(v["name"] == "make" for v in funcs.values()), sorted(funcs)


def test_two_anon_classes_same_method_no_collision():
src = (
"<?php\n"
"function a() { return new class { public function handle() { return 1; } }; }\n"
"function b() { return new class { public function handle() { return 2; } }; }\n"
)
funcs = _extract(src)["functions"]
handle_ids = [k for k, v in funcs.items() if v["name"] == "handle"]
assert len(handle_ids) == 2, (
f"two distinct anon-class handle() must not collide; got {handle_ids} "
f"(all keys: {sorted(funcs)})"
)
assert len(set(handle_ids)) == 2, f"duplicate ids: {handle_ids}"


def test_two_anon_classes_same_line_no_collision():
# Two `new class {}` on ONE physical line share a start line; the synthetic id must
# also use the column, or they collide and one method is silently lost.
src = (
"<?php\n"
"$a = new class { public function handle() { return 1; } }; "
"$b = new class { public function handle() { return 2; } };\n"
)
funcs = _extract(src)["functions"]
handle_ids = [k for k, v in funcs.items() if v["name"] == "handle"]
assert len(handle_ids) == 2, f"same-line anon classes collided; got {handle_ids}"
assert len(set(handle_ids)) == 2, f"duplicate ids: {handle_ids}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Regression test for the Zig generic-container method-attribution bug.

Zig's idiomatic generic container is a type-returning function:
pub fn List(comptime T: type) type { return struct { pub fn push(...) ... }; }
The returned struct is anonymous in the AST (a `struct_declaration` reached via
`return_expression`), NOT a `const Name = struct {...}` (`variable_declaration`).
The walker only threaded struct context for the variable_declaration form, so methods
inside a type-returning container were emitted as bare top-level functions with
class_name=None — and two distinct containers' same-named methods collided on one id.

Driven through the REAL extractor (FunctionExtractor.extract()) on a temp .zig file.
"""

import os
import sys
import tempfile
from pathlib import Path

import pytest

_CORE_ROOT = Path(__file__).resolve().parents[3]
sys.path.insert(0, str(_CORE_ROOT))

from parsers.zig.function_extractor import FunctionExtractor


def _extract(src: str) -> dict:
workdir = tempfile.mkdtemp()
with open(os.path.join(workdir, "m.zig"), "w") as fh:
fh.write(src)
return FunctionExtractor(workdir, {"files": [{"path": "m.zig"}]}).extract()


def _zig_parser_is_grammar_aligned() -> bool:
"""Probe the PREREQUISITE behavior (not this fix's): does a *named* struct's method
extract as Container.method? That capability is provided by the tree-sitter-zig
grammar-alignment work (>=1.1.2 node names struct_declaration/variable_declaration;
PRs 87/110, commit 322920e), independent of the generic-container fix under test.
On a base whose parser still matches stale node names (VarDecl/container_decl), no
struct methods extract at all, so these tests cannot pass for reasons unrelated to
the fix."""
probe = "const _Probe = struct {\n pub fn _m(self: _Probe) void { _ = self; }\n};\n"
return "m.zig:_Probe._m" in _extract(probe)["functions"]


# Skip (not fail) with an explanatory message when run on a base that lacks the
# grammar-alignment prerequisite — so a human or agent running this on raw master sees
# *why* instead of a cryptic assertion failure. Supported base: staging/parser-fix-stack,
# which carries upstream PR #110 (Zig parser realignment) AND the tree-sitter-zig>=1.1.2
# grammar pin. This is NOT landable on master standalone.
pytestmark = pytest.mark.skipif(
not _zig_parser_is_grammar_aligned(),
reason=(
"Zig parser not grammar-aligned (needs tree-sitter-zig>=1.1.2 node names "
"struct_declaration/variable_declaration, from upstream PR #110 + the grammar "
"pin). On such a base no struct methods extract, so the generic-container fix "
"cannot pass. Supported base: staging/parser-fix-stack — not landable on master."
),
)


def test_generic_container_method_qualified_to_container():
src = (
"pub fn List(comptime T: type) type {\n"
" return struct {\n"
" pub fn push(self: *@This(), x: T) void { _ = self; _ = x; }\n"
" };\n"
"}\n"
"fn ordinary() void {}\n"
)
out = _extract(src)
funcs = out["functions"]
assert "m.zig:List.push" in funcs, f"List.push missing; keys = {sorted(funcs)}"
info = funcs["m.zig:List.push"]
assert info["class_name"] == "List"
assert info["qualified_name"] == "List.push"
assert info["unit_type"] == "method"
# The method must NOT leak as a bare top-level function.
assert "m.zig:push" not in funcs, f"unqualified push leaked: {sorted(funcs)}"
# The plain function is unaffected.
assert "m.zig:ordinary" in funcs, sorted(funcs)


def test_two_generic_containers_methods_no_collision():
src = (
"pub fn List(comptime T: type) type {\n"
" return struct { pub fn len(self: *@This()) usize { _ = self; return 0; } };\n"
"}\n"
"pub fn Ring(comptime T: type) type {\n"
" return struct { pub fn len(self: *@This()) usize { _ = self; return 1; } };\n"
"}\n"
)
funcs = _extract(src)["functions"]
assert "m.zig:List.len" in funcs, f"keys = {sorted(funcs)}"
assert "m.zig:Ring.len" in funcs, f"silent collision/data-loss; keys = {sorted(funcs)}"
Loading