diff --git a/libs/openant-core/parsers/go/go_parser/callgraph.go b/libs/openant-core/parsers/go/go_parser/callgraph.go index c3a65375..337ab5dc 100644 --- a/libs/openant-core/parsers/go/go_parser/callgraph.go +++ b/libs/openant-core/parsers/go/go_parser/callgraph.go @@ -214,17 +214,29 @@ func (c *CallGraphBuilder) extractCalls(funcInfo FunctionInfo) []CallInfo { func (c *CallGraphBuilder) analyzeCallExpr(call *ast.CallExpr) CallInfo { info := CallInfo{} - switch fun := call.Fun.(type) { + // Unwrap a generic instantiation so fn[T](), fn[K,V](), obj.M[T]() and obj.M[K,V]() are + // analyzed identically to their non-generic forms. A single type argument parses as + // *ast.IndexExpr, multiple as *ast.IndexListExpr; both wrap the underlying function + // expression (an Ident or a SelectorExpr) in .X. + fun := call.Fun + switch idx := fun.(type) { + case *ast.IndexExpr: + fun = idx.X + case *ast.IndexListExpr: + fun = idx.X + } + + switch f := fun.(type) { case *ast.Ident: - // Simple call: funcName() - info.Name = fun.Name + // Simple call: funcName() (or generic Gen[..]()) + info.Name = f.Name case *ast.SelectorExpr: - // Method or package call: obj.Method() or pkg.Func() - info.Name = fun.Sel.Name + // Method or package call: obj.Method() or pkg.Func() (or generic obj.M[..]()) + info.Name = f.Sel.Name info.IsMethod = true - switch x := fun.X.(type) { + switch x := f.X.(type) { case *ast.Ident: info.Receiver = x.Name // Check if it looks like a package (lowercase) or object @@ -241,12 +253,6 @@ func (c *CallGraphBuilder) analyzeCallExpr(call *ast.CallExpr) CallInfo { // Result of another call: getObj().Method() info.Receiver = "~call_result~" } - - case *ast.IndexExpr: - // Generic function call: fn[T]() - if ident, ok := fun.X.(*ast.Ident); ok { - info.Name = ident.Name - } } return info diff --git a/libs/openant-core/parsers/go/go_parser/extractor.go b/libs/openant-core/parsers/go/go_parser/extractor.go index 5f3a1d13..34cf21e7 100644 --- a/libs/openant-core/parsers/go/go_parser/extractor.go +++ b/libs/openant-core/parsers/go/go_parser/extractor.go @@ -72,6 +72,9 @@ func (e *Extractor) extractFromFile(filePath string, output *AnalyzerOutput) err case *ast.FuncDecl: funcInfo := e.extractFunctionDecl(d, relPath, pkgName, content) funcID := e.makeFunctionID(relPath, funcInfo) + if w := duplicateIDWarning(output.Functions, funcID, funcInfo); w != "" { + fmt.Fprintln(os.Stderr, "Warning: "+w) + } output.Functions[funcID] = funcInfo } } @@ -186,6 +189,14 @@ func (e *Extractor) typeToString(expr ast.Expr) string { return "*" + e.typeToString(t.X) case *ast.SelectorExpr: return e.typeToString(t.X) + "." + t.Sel.Name + case *ast.IndexExpr: + // Generic type with one type parameter, e.g. Stack[T]. The class key is the + // base type, so drop the type argument and recurse into the base (t.X). + return e.typeToString(t.X) + case *ast.IndexListExpr: + // Generic type with multiple type parameters, e.g. Pair[K, V]. Same as above: + // the class key is the bare base type, so recurse into t.X and drop the args. + return e.typeToString(t.X) case *ast.ArrayType: if t.Len == nil { return "[]" + e.typeToString(t.Elt) @@ -336,6 +347,21 @@ func (e *Extractor) isMiddleware(params, returns []string, code string) bool { return false } +// duplicateIDWarning returns a non-empty diagnostic when funcID already maps to a unit in fns. +// A collision means two distinct declarations resolved to the same unit id — typically a +// class-key collapse (e.g. an unhandled receiver shape falling back to "unknown") — and the +// subsequent `fns[funcID] = ...` write would silently overwrite the earlier unit (data loss). +// The caller logs the returned string so the collapse is observable instead of silent. An empty +// string means no collision (the common case). +func duplicateIDWarning(fns map[string]FunctionInfo, funcID string, incoming FunctionInfo) string { + existing, ok := fns[funcID] + if !ok { + return "" + } + return fmt.Sprintf("duplicate unit id %q: %s:%d would overwrite %s:%d (class-key collapse?)", + funcID, incoming.FilePath, incoming.StartLine, existing.FilePath, existing.StartLine) +} + func (e *Extractor) makeFunctionID(filePath string, info FunctionInfo) string { if info.ClassName != "" { return fmt.Sprintf("%s:%s.%s", filePath, info.ClassName, info.Name) diff --git a/libs/openant-core/parsers/go/go_parser/extractor_dupguard_test.go b/libs/openant-core/parsers/go/go_parser/extractor_dupguard_test.go new file mode 100644 index 00000000..2f3ecdde --- /dev/null +++ b/libs/openant-core/parsers/go/go_parser/extractor_dupguard_test.go @@ -0,0 +1,39 @@ +package main + +// Guard test for the duplicate-unit-id store hazard (substrate defense-in-depth). +// +// The extractor stores units as `output.Functions[funcID] = info`. If two distinct declarations +// resolve to the same funcID — the failure mode behind the generic-receiver class-key collapse, +// where many methods became "m.go:unknown.Name" — the second write silently overwrites the first +// and a real unit vanishes with no signal. duplicateIDWarning makes that collision observable +// (the caller logs it to stderr) so a future unhandled shape fails loudly instead of silently. + +import ( + "strings" + "testing" +) + +func TestDuplicateIDWarning_NoCollision(t *testing.T) { + fns := map[string]FunctionInfo{} + if w := duplicateIDWarning(fns, "m.go:Stack.Len", FunctionInfo{FilePath: "m.go", StartLine: 10}); w != "" { + t.Errorf("expected no warning for a fresh id, got %q", w) + } + // A different id sharing the map must also not warn. + fns["m.go:Stack.Len"] = FunctionInfo{FilePath: "m.go", StartLine: 10} + if w := duplicateIDWarning(fns, "m.go:Queue.Len", FunctionInfo{FilePath: "m.go", StartLine: 20}); w != "" { + t.Errorf("expected no warning for a distinct id, got %q", w) + } +} + +func TestDuplicateIDWarning_Collision(t *testing.T) { + fns := map[string]FunctionInfo{ + "m.go:unknown.Len": {FilePath: "m.go", StartLine: 10}, + } + w := duplicateIDWarning(fns, "m.go:unknown.Len", FunctionInfo{FilePath: "m.go", StartLine: 20}) + if w == "" { + t.Fatal("expected a warning when funcID already present (silent overwrite), got none") + } + if !strings.Contains(w, "m.go:unknown.Len") { + t.Errorf("warning should name the colliding id; got %q", w) + } +} diff --git a/libs/openant-core/parsers/go/go_parser/extractor_generic_test.go b/libs/openant-core/parsers/go/go_parser/extractor_generic_test.go new file mode 100644 index 00000000..86cab48f --- /dev/null +++ b/libs/openant-core/parsers/go/go_parser/extractor_generic_test.go @@ -0,0 +1,142 @@ +package main + +// Regression tests for the generic-type key-collapse bug family in the Go parser. +// +// Bug 1 (extractor.go typeToString): a method on a generic type — `func (s *Stack[T]) Push()` — +// parses with an *ast.IndexExpr (one type param) or *ast.IndexListExpr (multiple) inside the +// receiver. typeToString had no case for either, so it fell to `default: return "unknown"`, +// making className "unknown". Every generic-type method across a repo then collapsed onto the +// fake class `unknown`, and DISTINCT types' same-named methods collided onto one unit id — +// silent data loss via map overwrite (output.Functions[id] = info). +// +// Bug 2 (callgraph.go analyzeCallExpr): generic CALL sites were dropped — `Gen[K,V]()` +// (*ast.IndexListExpr) had no case, and `obj.M[T]()` (*ast.IndexExpr whose .X is a +// *ast.SelectorExpr) was rejected by the `fun.X.(*ast.Ident)` guard — losing call edges. +// +// These tests exercise the stable public boundaries (Extract / extractCalls) so they compile +// against both pre-fix and post-fix sources; RED is demonstrated by running pre-fix. + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// writeTempGo writes src to /m.go and returns the path; the relPath from the repo root +// (dir) is therefore "m.go", so unit ids are "m.go:.". +func writeTempGo(t *testing.T, src string) (repo, file string) { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "m.go") + if err := os.WriteFile(path, []byte(src), 0o644); err != nil { + t.Fatalf("write temp go: %v", err) + } + return dir, path +} + +func extractIDs(t *testing.T, src string) map[string]FunctionInfo { + t.Helper() + repo, file := writeTempGo(t, src) + out, err := NewExtractor(repo).Extract([]string{file}) + if err != nil { + t.Fatalf("Extract: %v", err) + } + return out.Functions +} + +// Bug 1: generic receivers must key under the bare base type, never "unknown". +func TestGenericReceiver_ClassNameIsBaseType(t *testing.T) { + src := `package main + +type Stack[T any] struct{ items []T } +type Pair[K any, V any] struct{ k K; v V } +type Plain struct{} + +func (s *Stack[T]) Push(x T) {} // *ast.StarExpr -> *ast.IndexExpr +func (s Stack[T]) PeekVal() (z T) { return }// value receiver: *ast.IndexExpr directly +func (p *Pair[K, V]) Key() (k K) { return }// *ast.StarExpr -> *ast.IndexListExpr +func (p *Plain) Do() {} // control: *ast.Ident +` + fns := extractIDs(t, src) + want := []string{ + "m.go:Stack.Push", + "m.go:Stack.PeekVal", + "m.go:Pair.Key", + "m.go:Plain.Do", + } + for _, id := range want { + if _, ok := fns[id]; !ok { + t.Errorf("missing unit id %q; got ids %v", id, keys(fns)) + } + } + for id := range fns { + if strings.Contains(id, "unknown") { + t.Errorf("unit id %q contains the bogus class key 'unknown'", id) + } + } +} + +// Bug 1 (deeper, the data-loss regression): two DISTINCT generic types with a same-named method +// must produce two distinct units — pre-fix both collapsed to "m.go:unknown.Len" and the map +// overwrite silently dropped one. +func TestGenericTypes_NoUnitCollision(t *testing.T) { + src := `package main + +type Stack[T any] struct{} +type Queue[T any] struct{} + +func (s *Stack[T]) Len() int { return 0 } +func (q *Queue[T]) Len() int { return 1 } +` + fns := extractIDs(t, src) + if _, ok := fns["m.go:Stack.Len"]; !ok { + t.Errorf("missing m.go:Stack.Len; got %v", keys(fns)) + } + if _, ok := fns["m.go:Queue.Len"]; !ok { + t.Errorf("missing m.go:Queue.Len (silent collision/data-loss); got %v", keys(fns)) + } + if len(fns) != 2 { + t.Errorf("expected exactly 2 distinct method units, got %d: %v", len(fns), keys(fns)) + } +} + +// Bug 2: generic call sites must recover the called name (and receiver for generic methods). +func TestCallgraph_GenericCalls(t *testing.T) { + c := NewCallGraphBuilder("/repo") + fi := FunctionInfo{ + Name: "Run", + FilePath: "a/a.go", + Code: "func Run(){ " + + "Gen[int](); " + // *ast.IndexExpr{Ident} — already worked (control) + "Gen2[int,string](); " + // *ast.IndexListExpr — was dropped + "o.M[int](); " + // *ast.IndexExpr{SelectorExpr} — was dropped + "o.N[int,bool]() " + // *ast.IndexListExpr{SelectorExpr} — was dropped + "}", + } + calls := c.extractCalls(fi) + got := map[string]CallInfo{} + for _, ci := range calls { + got[ci.Name] = ci + } + for _, name := range []string{"Gen", "Gen2", "M", "N"} { + if _, ok := got[name]; !ok { + t.Errorf("generic call %q not recovered; got calls %+v", name, calls) + } + } + // generic METHOD calls must also recover the receiver + if ci, ok := got["M"]; ok && ci.Receiver != "o" { + t.Errorf("o.M[int](): expected receiver 'o', got %q", ci.Receiver) + } + if ci, ok := got["N"]; ok && ci.Receiver != "o" { + t.Errorf("o.N[int,bool](): expected receiver 'o', got %q", ci.Receiver) + } +} + +func keys(m map[string]FunctionInfo) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + return out +} diff --git a/libs/openant-core/parsers/go/go_parser/go_parser b/libs/openant-core/parsers/go/go_parser/go_parser deleted file mode 100755 index 198b846c..00000000 Binary files a/libs/openant-core/parsers/go/go_parser/go_parser and /dev/null differ