diff --git a/code_review_graph/flows.py b/code_review_graph/flows.py index fd12ade6..4bd3c1ee 100644 --- a/code_review_graph/flows.py +++ b/code_review_graph/flows.py @@ -655,6 +655,55 @@ def get_flow_by_id(store: GraphStore, flow_id: int) -> Optional[dict]: } +def summarize_flow(flow: dict) -> dict: + """Return a compact summary of a flow for token-efficient output. + + Produces a narrative paragraph and structured bullet fields from a full + flow dict (as returned by :func:`get_flow_by_id`). Agents can use this + instead of the raw step list when they only need to know which flows are + affected and how critical they are. + + Returns a dict with: + - ``flow_id``: database ID + - ``name``: entry point name + - ``entry_point``: qualified name of the entry point + - ``criticality``: float score (0.0-1.0) + - ``depth``: BFS depth of the flow + - ``node_count``: number of nodes in the path + - ``file_count``: number of distinct files touched + - ``summary``: one-paragraph narrative description + """ + flow_id = flow.get("id") + name = flow.get("name", "unknown") + entry_point = flow.get("entry_point", "") + criticality = flow.get("criticality", 0.0) + depth = flow.get("depth", 0) + node_count = flow.get("node_count", 0) + file_count = flow.get("file_count", 0) + + risk_label = ( + "high" if criticality >= 0.7 + else "medium" if criticality >= 0.4 + else "low" + ) + summary = ( + f"Flow '{name}' (entry: {entry_point}) has {risk_label} criticality " + f"({criticality:.2f}): {node_count} nodes across {file_count} file(s) " + f"at depth {depth}." + ) + + return { + "flow_id": flow_id, + "name": name, + "entry_point": entry_point, + "criticality": criticality, + "depth": depth, + "node_count": node_count, + "file_count": file_count, + "summary": summary, + } + + def get_affected_flows( store: GraphStore, changed_files: list[str], diff --git a/code_review_graph/main.py b/code_review_graph/main.py index 8f4fdbe8..12f1ffe6 100644 --- a/code_review_graph/main.py +++ b/code_review_graph/main.py @@ -43,6 +43,7 @@ get_impact_radius, get_knowledge_gaps_func, get_minimal_context, + get_review_bundle, get_review_context, get_suggested_questions_func, get_surprising_connections_func, @@ -355,6 +356,7 @@ async def embed_graph_tool( @mcp.tool() def list_graph_stats_tool( repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Get aggregate statistics about the code knowledge graph. @@ -363,8 +365,10 @@ def list_graph_stats_tool( Args: repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" returns breakdown by kind; "minimal" returns + total_nodes, total_edges, files_count, and last_updated only. """ - return list_graph_stats(repo_root=_resolve_repo_root(repo_root)) + return list_graph_stats(repo_root=_resolve_repo_root(repo_root), detail_level=detail_level) @mcp.tool() @@ -397,6 +401,7 @@ def find_large_functions_tool( file_path_pattern: Optional[str] = None, limit: int = 50, repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Find functions, classes, or files exceeding a line-count threshold. @@ -409,10 +414,12 @@ def find_large_functions_tool( file_path_pattern: Filter by file path substring (e.g. "components/"). limit: Maximum results. Default: 50. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full node data, "minimal" for + name/kind/line_count/relative_path only. """ return find_large_functions( min_lines=min_lines, kind=kind, file_path_pattern=file_path_pattern, - limit=limit, repo_root=_resolve_repo_root(repo_root), + limit=limit, repo_root=_resolve_repo_root(repo_root), detail_level=detail_level, ) @@ -475,6 +482,7 @@ def get_affected_flows_tool( changed_files: Optional[list[str]] = None, base: str = "HEAD~1", repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Find execution flows affected by changed files. @@ -486,9 +494,12 @@ def get_affected_flows_tool( changed_files: List of changed file paths (relative to repo root). Auto-detected if omitted. base: Git ref for auto-detecting changes. Default: HEAD~1. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" returns raw flow data; "minimal" returns a + narrative summary with bullet IDs — raw steps moved to a "detail" key. """ return get_affected_flows_func( - changed_files=changed_files, base=base, repo_root=_resolve_repo_root(repo_root), + changed_files=changed_files, base=base, + repo_root=_resolve_repo_root(repo_root), detail_level=detail_level, ) @@ -525,6 +536,7 @@ def get_community_tool( community_id: Optional[int] = None, include_members: bool = False, repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Get detailed information about a single code community. @@ -539,16 +551,19 @@ def get_community_tool( community_id: Database ID of the community. include_members: Include full member node details. Default: False. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full metadata, "minimal" for name/size/cohesion/language. """ return get_community_func( community_name=community_name, community_id=community_id, include_members=include_members, repo_root=_resolve_repo_root(repo_root), + detail_level=detail_level, ) @mcp.tool() def get_architecture_overview_tool( repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Generate an architecture overview based on community structure. @@ -558,8 +573,12 @@ def get_architecture_overview_tool( Args: repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full overview with edges, "minimal" for + community names, counts, and warnings only. """ - return get_architecture_overview_func(repo_root=_resolve_repo_root(repo_root)) + return get_architecture_overview_func( + repo_root=_resolve_repo_root(repo_root), detail_level=detail_level, + ) @mcp.tool() @@ -598,6 +617,43 @@ async def detect_changes_tool( ) +@mcp.tool() +async def get_review_bundle_tool( + base: str = "HEAD~1", + changed_files: Optional[list[str]] = None, + max_nodes: int = 20, + include_snippets: bool = True, + repo_root: Optional[str] = None, +) -> dict: + """Return a complete review package in a single round-trip. + + Collapses the typical 2-4 tool call review workflow into one response. + Combines risk-scored change analysis, one-hop caller/callee subgraph + for changed symbols, affected flow summaries, and a structured checklist. + + Use this instead of calling detect_changes + get_affected_flows + query_graph + separately — saves 60-80% of review round-trips. + + Offloaded to a thread via ``asyncio.to_thread`` — runs git diff subprocesses + and BFS traversals that can take several seconds on large repos. + + Args: + base: Git ref to diff against. Default: HEAD~1. + changed_files: List of changed file paths (relative to repo root). Auto-detected if omitted. + max_nodes: Maximum neighbour nodes in the caller/callee subgraph. Default: 20. + include_snippets: Include source code snippets for changed functions. Default: True. + repo_root: Repository root path. Auto-detected if omitted. + """ + return await asyncio.to_thread( + get_review_bundle, + base=base, + changed_files=changed_files, + max_nodes=max_nodes, + include_snippets=include_snippets, + repo_root=_resolve_repo_root(repo_root), + ) + + @mcp.tool() def refactor_tool( mode: str = "rename", @@ -606,6 +662,7 @@ def refactor_tool( kind: Optional[str] = None, file_pattern: Optional[str] = None, repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Graph-powered refactoring operations. @@ -627,10 +684,13 @@ def refactor_tool( kind: (dead_code) Optional filter: Function or Class. file_pattern: (dead_code) Filter by file path substring. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full results, "minimal" caps dead_code/suggest + to 10 items with name/kind only. """ return refactor_func( mode=mode, old_name=old_name, new_name=new_name, kind=kind, file_pattern=file_pattern, repo_root=_resolve_repo_root(repo_root), + detail_level=detail_level, ) @@ -713,6 +773,7 @@ def get_wiki_page_tool( def get_hub_nodes_tool( top_n: int = 10, repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Find the most connected nodes in the codebase (architectural hotspots). @@ -722,9 +783,10 @@ def get_hub_nodes_tool( Args: top_n: Number of top hubs to return. Default: 10. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full hub data, "minimal" for name/kind/degree only. """ return get_hub_nodes_func( - repo_root=_resolve_repo_root(repo_root), top_n=top_n, + repo_root=_resolve_repo_root(repo_root), top_n=top_n, detail_level=detail_level, ) @@ -732,6 +794,7 @@ def get_hub_nodes_tool( def get_bridge_nodes_tool( top_n: int = 10, repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Find architectural chokepoints via betweenness centrality. @@ -742,15 +805,17 @@ def get_bridge_nodes_tool( Args: top_n: Number of top bridges to return. Default: 10. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full data, "minimal" for name/kind/centrality only. """ return get_bridge_nodes_func( - repo_root=_resolve_repo_root(repo_root), top_n=top_n, + repo_root=_resolve_repo_root(repo_root), top_n=top_n, detail_level=detail_level, ) @mcp.tool() def get_knowledge_gaps_tool( repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Identify structural weaknesses in the codebase graph. @@ -760,9 +825,10 @@ def get_knowledge_gaps_tool( Args: repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full gap details, "minimal" for counts only. """ return get_knowledge_gaps_func( - repo_root=_resolve_repo_root(repo_root), + repo_root=_resolve_repo_root(repo_root), detail_level=detail_level, ) @@ -770,6 +836,7 @@ def get_knowledge_gaps_tool( def get_surprising_connections_tool( top_n: int = 15, repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Find unexpected architectural coupling via composite surprise scoring. @@ -780,15 +847,17 @@ def get_surprising_connections_tool( Args: top_n: Number of top surprises to return. Default: 15. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full edge data, "minimal" for source/target/score only. """ return get_surprising_connections_func( - repo_root=_resolve_repo_root(repo_root), top_n=top_n, + repo_root=_resolve_repo_root(repo_root), top_n=top_n, detail_level=detail_level, ) @mcp.tool() def get_suggested_questions_tool( repo_root: Optional[str] = None, + detail_level: str = "standard", ) -> dict: """Auto-generate review questions from graph analysis. @@ -798,9 +867,10 @@ def get_suggested_questions_tool( Args: repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" for full question data, "minimal" for top 5 text + counts. """ return get_suggested_questions_func( - repo_root=_resolve_repo_root(repo_root), + repo_root=_resolve_repo_root(repo_root), detail_level=detail_level, ) @@ -835,13 +905,16 @@ def traverse_graph_tool( @mcp.tool() -def list_repos_tool() -> dict: +def list_repos_tool(detail_level: str = "standard") -> dict: """List all registered repositories in the multi-repo registry. Returns the list of repos registered at ~/.code-review-graph/registry.json. Use the CLI 'register' command to add repos. + + Args: + detail_level: "standard" for full metadata, "minimal" for alias and path only. """ - return list_repos_func() + return list_repos_func(detail_level=detail_level) @mcp.tool() @@ -849,6 +922,7 @@ def cross_repo_search_tool( query: str, kind: Optional[str] = None, limit: int = 20, + detail_level: str = "standard", ) -> dict: """Search for code entities across all registered repositories. @@ -859,8 +933,9 @@ def cross_repo_search_tool( query: Search string to match against node names. kind: Optional filter: File, Class, Function, Type, or Test. limit: Maximum results per repo. Default: 20. + detail_level: "standard" for full node data, "minimal" for name/kind/repo/file only. """ - return cross_repo_search_func(query=query, kind=kind, limit=limit) + return cross_repo_search_func(query=query, kind=kind, limit=limit, detail_level=detail_level) @mcp.prompt() diff --git a/code_review_graph/tools/__init__.py b/code_review_graph/tools/__init__.py index 385e289c..804cbd72 100644 --- a/code_review_graph/tools/__init__.py +++ b/code_review_graph/tools/__init__.py @@ -1,6 +1,6 @@ """MCP tool definitions for the Code Review Graph server. -Exposes 27 tools: +Exposes 29 tools: 1. build_or_update_graph - full or incremental build 2. get_impact_radius - blast radius from changed files 3. query_graph - predefined graph queries @@ -29,6 +29,7 @@ 26. get_surprising_connections - find unexpected architectural coupling 27. get_suggested_questions - auto-generated review questions from graph analysis 28. traverse_graph - BFS/DFS traversal from best-matching node +29. get_review_bundle - single-round-trip complete review package """ from __future__ import annotations @@ -99,6 +100,7 @@ from .review import ( detect_changes_func, get_affected_flows_func, + get_review_bundle, get_review_context, ) @@ -140,6 +142,7 @@ # review "detect_changes_func", "get_affected_flows_func", + "get_review_bundle", "get_review_context", # analysis_tools "get_bridge_nodes_func", diff --git a/code_review_graph/tools/analysis_tools.py b/code_review_graph/tools/analysis_tools.py index 96a70edb..67f28256 100644 --- a/code_review_graph/tools/analysis_tools.py +++ b/code_review_graph/tools/analysis_tools.py @@ -17,6 +17,7 @@ def get_hub_nodes_func( repo_root: str | None = None, top_n: int = 10, + detail_level: str = "standard", ) -> dict[str, Any]: """Find the most connected nodes in the codebase graph. @@ -27,23 +28,41 @@ def get_hub_nodes_func( Args: repo_root: Repository root (auto-detected if omitted). top_n: Number of top hubs to return (default 10). + detail_level: "standard" returns full hub data; "minimal" + returns only name, kind, and degree per hub. """ store, _root = _get_store(repo_root or None) hubs = find_hub_nodes(store, top_n=top_n) + suggestions = [ + "get_impact_radius -- check blast radius of a hub", + "query_graph callers_of -- see what calls a hub", + "get_bridge_nodes -- find architectural chokepoints", + ] + if detail_level == "minimal": + minimal_hubs = [ + {k: h[k] for k in ("name", "kind", "in_degree", "out_degree", "total_degree") if k in h} + for h in hubs + ] + return { + "status": "ok", + "summary": f"Found {len(hubs)} hub node(s)", + "hub_nodes": minimal_hubs, + "count": len(hubs), + "next_tool_suggestions": suggestions, + } return { + "status": "ok", + "summary": f"Found {len(hubs)} hub node(s)", "hub_nodes": hubs, "count": len(hubs), - "next_tool_suggestions": [ - "get_impact_radius -- check blast radius of a hub", - "query_graph callers_of -- see what calls a hub", - "get_bridge_nodes -- find architectural chokepoints", - ], + "next_tool_suggestions": suggestions, } def get_bridge_nodes_func( repo_root: str | None = None, top_n: int = 10, + detail_level: str = "standard", ) -> dict[str, Any]: """Find architectural chokepoints via betweenness centrality. @@ -54,22 +73,40 @@ def get_bridge_nodes_func( Args: repo_root: Repository root (auto-detected if omitted). top_n: Number of top bridges to return (default 10). + detail_level: "standard" returns full bridge data; "minimal" + returns only name, kind, and centrality per bridge. """ store, _root = _get_store(repo_root or None) bridges = find_bridge_nodes(store, top_n=top_n) + suggestions = [ + "get_hub_nodes -- find most connected nodes", + "get_impact_radius -- check blast radius", + "detect_changes -- see if bridges are affected", + ] + if detail_level == "minimal": + minimal_bridges = [ + {k: b[k] for k in ("name", "kind", "betweenness") if k in b} + for b in bridges + ] + return { + "status": "ok", + "summary": f"Found {len(bridges)} bridge node(s)", + "bridge_nodes": minimal_bridges, + "count": len(bridges), + "next_tool_suggestions": suggestions, + } return { + "status": "ok", + "summary": f"Found {len(bridges)} bridge node(s)", "bridge_nodes": bridges, "count": len(bridges), - "next_tool_suggestions": [ - "get_hub_nodes -- find most connected nodes", - "get_impact_radius -- check blast radius", - "detect_changes -- see if bridges are affected", - ], + "next_tool_suggestions": suggestions, } def get_knowledge_gaps_func( repo_root: str | None = None, + detail_level: str = "standard", ) -> dict[str, Any]: """Identify structural weaknesses in the codebase. @@ -79,36 +116,45 @@ def get_knowledge_gaps_func( Args: repo_root: Repository root (auto-detected if omitted). + detail_level: "standard" returns full gap details; "minimal" + returns only the count summary per category. """ store, _root = _get_store(repo_root or None) gaps = find_knowledge_gaps(store) total = sum(len(v) for v in gaps.values()) + count_summary = { + "isolated_nodes": len(gaps["isolated_nodes"]), + "thin_communities": len(gaps["thin_communities"]), + "untested_hotspots": len(gaps["untested_hotspots"]), + "single_file_communities": len(gaps["single_file_communities"]), + } + suggestions = [ + "refactor dead_code -- find unused symbols", + "get_hub_nodes -- find high-impact nodes", + "get_suggested_questions -- review prompts", + ] + if detail_level == "minimal": + return { + "status": "ok", + "summary": f"{total} gap(s) across {len(gaps)} categories", + "total_gaps": total, + "summary_by_category": count_summary, + "next_tool_suggestions": suggestions, + } return { + "status": "ok", + "summary": f"{total} gap(s) across {len(gaps)} categories", "gaps": gaps, "total_gaps": total, - "summary": { - "isolated_nodes": len(gaps["isolated_nodes"]), - "thin_communities": len( - gaps["thin_communities"] - ), - "untested_hotspots": len( - gaps["untested_hotspots"] - ), - "single_file_communities": len( - gaps["single_file_communities"] - ), - }, - "next_tool_suggestions": [ - "refactor dead_code -- find unused symbols", - "get_hub_nodes -- find high-impact nodes", - "get_suggested_questions -- review prompts", - ], + "summary_by_category": count_summary, + "next_tool_suggestions": suggestions, } def get_surprising_connections_func( repo_root: str | None = None, top_n: int = 15, + detail_level: str = "standard", ) -> dict[str, Any]: """Find unexpected architectural coupling in the codebase. @@ -118,24 +164,40 @@ def get_surprising_connections_func( Args: repo_root: Repository root (auto-detected if omitted). top_n: Number of top surprises to return (default 15). + detail_level: "standard" returns full edge data; "minimal" + returns only source, target, and surprise_score per connection. """ store, _root = _get_store(repo_root or None) - surprises = find_surprising_connections( - store, top_n=top_n - ) + surprises = find_surprising_connections(store, top_n=top_n) + suggestions = [ + "get_architecture_overview -- community structure", + "query_graph callers_of -- trace the coupling", + "get_bridge_nodes -- find chokepoints", + ] + if detail_level == "minimal": + minimal = [ + {k: s[k] for k in ("source", "target", "surprise_score", "reasons") if k in s} + for s in surprises + ] + return { + "status": "ok", + "summary": f"Found {len(surprises)} surprising connection(s)", + "surprising_connections": minimal, + "count": len(surprises), + "next_tool_suggestions": suggestions, + } return { + "status": "ok", + "summary": f"Found {len(surprises)} surprising connection(s)", "surprising_connections": surprises, "count": len(surprises), - "next_tool_suggestions": [ - "get_architecture_overview -- community structure", - "query_graph callers_of -- trace the coupling", - "get_bridge_nodes -- find chokepoints", - ], + "next_tool_suggestions": suggestions, } def get_suggested_questions_func( repo_root: str | None = None, + detail_level: str = "standard", ) -> dict[str, Any]: """Auto-generate review questions from graph analysis. @@ -145,6 +207,8 @@ def get_suggested_questions_func( Args: repo_root: Repository root (auto-detected if omitted). + detail_level: "standard" returns full question data; "minimal" + returns only question text for the top 5, grouped by priority count. """ store, _root = _get_store(repo_root or None) questions = generate_suggested_questions(store) @@ -155,15 +219,25 @@ def get_suggested_questions_func( prio = q.get("priority", "medium") if prio in by_priority: by_priority[prio].append(q) + suggestions = [ + "get_knowledge_gaps -- structural weaknesses", + "detect_changes -- risk-scored review", + "get_architecture_overview -- community map", + ] + if detail_level == "minimal": + return { + "status": "ok", + "summary": f"{len(questions)} question(s) generated", + "count": len(questions), + "by_priority": {k: len(v) for k, v in by_priority.items()}, + "top_questions": [q.get("question", "") for q in questions[:5]], + "next_tool_suggestions": suggestions, + } return { + "status": "ok", + "summary": f"{len(questions)} question(s) generated", "questions": questions, "count": len(questions), - "by_priority": { - k: len(v) for k, v in by_priority.items() - }, - "next_tool_suggestions": [ - "get_knowledge_gaps -- structural weaknesses", - "detect_changes -- risk-scored review", - "get_architecture_overview -- community map", - ], + "by_priority": {k: len(v) for k, v in by_priority.items()}, + "next_tool_suggestions": suggestions, } diff --git a/code_review_graph/tools/community_tools.py b/code_review_graph/tools/community_tools.py index d4309fb4..4c1ef487 100644 --- a/code_review_graph/tools/community_tools.py +++ b/code_review_graph/tools/community_tools.py @@ -73,6 +73,7 @@ def get_community_func( community_id: int | None = None, include_members: bool = False, repo_root: str | None = None, + detail_level: str = "standard", ) -> dict[str, Any]: """Get details of a single code community. @@ -85,6 +86,8 @@ def get_community_func( community_id: Database ID of the community. include_members: If True, include full member node details. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" returns full community metadata; "minimal" + returns name, size, cohesion, and dominant_language only. Returns: Community details, or not_found status. @@ -120,13 +123,31 @@ def get_community_func( members = [node_to_dict(n) for n in member_nodes] community["member_details"] = members + summary_str = ( + f"Community '{community['name']}': " + f"{community['size']} nodes, " + f"cohesion {community['cohesion']:.4f}" + ) + + if detail_level == "minimal": + minimal_community = { + k: community[k] + for k in ("name", "size", "cohesion", "dominant_language") + if k in community + } + result = { + "status": "ok", + "summary": summary_str, + "community": minimal_community, + } + result["_hints"] = generate_hints( + "get_community", result, get_session() + ) + return result + result = { "status": "ok", - "summary": ( - f"Community '{community['name']}': " - f"{community['size']} nodes, " - f"cohesion {community['cohesion']:.4f}" - ), + "summary": summary_str, "community": community, } result["_hints"] = generate_hints( @@ -146,6 +167,7 @@ def get_community_func( def get_architecture_overview_func( repo_root: str | None = None, + detail_level: str = "standard", ) -> dict[str, Any]: """Generate an architecture overview based on community structure. @@ -155,6 +177,9 @@ def get_architecture_overview_func( Args: repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" returns full overview with all communities + and cross-community edges; "minimal" returns community names, + counts, and warnings only (no edge details). Returns: Architecture overview with communities, cross-community edges, @@ -166,13 +191,30 @@ def get_architecture_overview_func( n_communities = len(overview["communities"]) n_cross = len(overview["cross_community_edges"]) n_warnings = len(overview["warnings"]) + summary_str = ( + f"Architecture: {n_communities} communities, " + f"{n_cross} cross-community edges, " + f"{n_warnings} warning(s)" + ) + if detail_level == "minimal": + minimal_communities = [ + {"name": c["name"], "size": c.get("size", 0)} + for c in overview["communities"] + ] + result: dict[str, Any] = { + "status": "ok", + "summary": summary_str, + "communities": minimal_communities, + "cross_community_edge_count": n_cross, + "warnings": overview["warnings"], + } + result["_hints"] = generate_hints( + "get_architecture_overview", result, get_session() + ) + return result result = { "status": "ok", - "summary": ( - f"Architecture: {n_communities} communities, " - f"{n_cross} cross-community edges, " - f"{n_warnings} warning(s)" - ), + "summary": summary_str, **overview, } result["_hints"] = generate_hints( diff --git a/code_review_graph/tools/query.py b/code_review_graph/tools/query.py index e3d8c3e1..30e7209b 100644 --- a/code_review_graph/tools/query.py +++ b/code_review_graph/tools/query.py @@ -430,11 +430,17 @@ def semantic_search_nodes( # --------------------------------------------------------------------------- -def list_graph_stats(repo_root: str | None = None) -> dict[str, Any]: +def list_graph_stats( + repo_root: str | None = None, + detail_level: str = "standard", +) -> dict[str, Any]: """Get aggregate statistics about the knowledge graph. Args: repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" returns full breakdown by kind; "minimal" + returns only total_nodes, total_edges, files_count, and + last_updated. Returns: Total nodes, edges, breakdown by kind, languages, and last update time. @@ -443,6 +449,28 @@ def list_graph_stats(repo_root: str | None = None) -> dict[str, Any]: try: stats = store.get_stats() + emb_count = 0 + emb_store = EmbeddingStore(get_db_path(root)) + try: + emb_count = emb_store.count() + finally: + emb_store.close() + + if detail_level == "minimal": + summary = ( + f"{stats.total_nodes} nodes, {stats.total_edges} edges" + f" across {stats.files_count} files" + + (f" ({', '.join(stats.languages[:3])})" if stats.languages else "") + ) + return { + "status": "ok", + "summary": summary, + "total_nodes": stats.total_nodes, + "total_edges": stats.total_edges, + "files_count": stats.files_count, + "last_updated": stats.last_updated, + } + summary_parts = [ f"Graph statistics for {root.name}:", f" Files: {stats.files_count}", @@ -459,19 +487,8 @@ def list_graph_stats(repo_root: str | None = None) -> dict[str, Any]: summary_parts.append("Edges by kind:") for kind, count in sorted(stats.edges_by_kind.items()): summary_parts.append(f" {kind}: {count}") - - # Add embedding info if available - emb_store = EmbeddingStore(get_db_path(root)) - try: - emb_count = emb_store.count() - summary_parts.append("") - summary_parts.append(f"Embeddings: {emb_count} nodes embedded") - if not emb_store.available: - summary_parts.append( - " (install sentence-transformers for semantic search)" - ) - finally: - emb_store.close() + summary_parts.append("") + summary_parts.append(f"Embeddings: {emb_count} nodes embedded") return { "status": "ok", @@ -500,6 +517,7 @@ def find_large_functions( file_path_pattern: str | None = None, limit: int = 50, repo_root: str | None = None, + detail_level: str = "standard", ) -> dict[str, Any]: """Find functions, classes, or files exceeding a line-count threshold. @@ -512,6 +530,8 @@ def find_large_functions( file_path_pattern: Filter by file path substring (e.g. "components/"). limit: Maximum results (default: 50). repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" returns full node data; "minimal" returns + only name, kind, line_count, and relative_path per result. Returns: Oversized nodes with line counts, ordered largest first. @@ -533,7 +553,6 @@ def find_large_functions( if n.line_start and n.line_end else 0 ) - # Make file_path relative for readability try: d["relative_path"] = str(Path(n.file_path).relative_to(root)) except ValueError: @@ -554,6 +573,19 @@ def find_large_functions( if len(results) > 10: summary_parts.append(f" ... and {len(results) - 10} more") + if detail_level == "minimal": + minimal = [ + {k: r[k] for k in ("name", "kind", "line_count", "relative_path") if k in r} + for r in results + ] + return { + "status": "ok", + "summary": "\n".join(summary_parts), + "total_found": len(results), + "min_lines": min_lines, + "results": minimal, + } + return { "status": "ok", "summary": "\n".join(summary_parts), diff --git a/code_review_graph/tools/refactor_tools.py b/code_review_graph/tools/refactor_tools.py index c4ccf5ef..f2023910 100644 --- a/code_review_graph/tools/refactor_tools.py +++ b/code_review_graph/tools/refactor_tools.py @@ -27,6 +27,7 @@ def refactor_func( kind: str | None = None, file_pattern: str | None = None, repo_root: str | None = None, + detail_level: str = "standard", ) -> dict[str, Any]: """Unified refactoring entry point. @@ -43,6 +44,8 @@ def refactor_func( kind: (dead_code mode) Optional node kind filter. file_pattern: (dead_code mode) Optional file path substring filter. repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" returns full data; "minimal" caps dead_code + and suggest lists to 10 items with name/kind/file only. Returns: Mode-specific results dict. @@ -92,12 +95,26 @@ def refactor_func( dead = find_dead_code( store, kind=kind, file_pattern=file_pattern ) - result = { - "status": "ok", - "summary": f"Found {len(dead)} dead code symbol(s).", - "dead_code": dead, - "total": len(dead), - } + total = len(dead) + if detail_level == "minimal": + dead = [ + {k: d[k] for k in ("name", "kind", "file_path") if k in d} + for d in dead[:10] + ] + result = { + "status": "ok", + "summary": f"Found {total} dead code symbol(s) (showing top 10).", + "dead_code": dead, + "total": total, + "omitted": max(0, total - 10), + } + else: + result = { + "status": "ok", + "summary": f"Found {total} dead code symbol(s).", + "dead_code": dead, + "total": total, + } result["_hints"] = generate_hints( "refactor", result, get_session() ) @@ -105,15 +122,26 @@ def refactor_func( else: # suggest suggestions = suggest_refactorings(store) - result = { - "status": "ok", - "summary": ( - f"Generated {len(suggestions)} " - "refactoring suggestion(s)." - ), - "suggestions": suggestions, - "total": len(suggestions), - } + total = len(suggestions) + if detail_level == "minimal": + suggestions = [ + {k: s[k] for k in ("name", "kind", "suggestion") if k in s} + for s in suggestions[:10] + ] + result = { + "status": "ok", + "summary": f"Generated {total} refactoring suggestion(s) (showing top 10).", + "suggestions": suggestions, + "total": total, + "omitted": max(0, total - 10), + } + else: + result = { + "status": "ok", + "summary": f"Generated {total} refactoring suggestion(s).", + "suggestions": suggestions, + "total": total, + } result["_hints"] = generate_hints( "refactor", result, get_session() ) diff --git a/code_review_graph/tools/registry_tools.py b/code_review_graph/tools/registry_tools.py index 698cf23e..019afb3e 100644 --- a/code_review_graph/tools/registry_tools.py +++ b/code_review_graph/tools/registry_tools.py @@ -18,12 +18,16 @@ # --------------------------------------------------------------------------- -def list_repos_func() -> dict[str, Any]: +def list_repos_func(detail_level: str = "standard") -> dict[str, Any]: """List all registered repositories. [REGISTRY] Returns the list of repositories registered in the global multi-repo registry at ``~/.code-review-graph/registry.json``. + Args: + detail_level: "standard" returns full repo metadata; "minimal" + returns only alias and path per repo. + Returns: List of registered repos with paths and aliases. """ @@ -32,6 +36,11 @@ def list_repos_func() -> dict[str, Any]: try: registry = Registry() repos = registry.list_repos() + if detail_level == "minimal": + repos = [ + {k: r[k] for k in ("alias", "path") if k in r} + for r in repos + ] return { "status": "ok", "summary": f"{len(repos)} registered repository(ies)", @@ -50,6 +59,7 @@ def cross_repo_search_func( query: str, kind: str | None = None, limit: int = 20, + detail_level: str = "standard", ) -> dict[str, Any]: """Search across all registered repositories. @@ -60,6 +70,8 @@ def cross_repo_search_func( query: Search query string. kind: Optional node kind filter (e.g. "Function", "Class"). limit: Maximum results per repo (default: 20). + detail_level: "standard" returns full node data; "minimal" returns + only name, kind, repo, and file_path per result. Returns: Combined search results from all registered repos. @@ -112,13 +124,20 @@ def cross_repo_search_func( key=lambda r: r.get("score", 0), reverse=True ) + trimmed = all_results[:limit] + if detail_level == "minimal": + trimmed = [ + {k: r[k] for k in ("name", "kind", "repo", "file_path") if k in r} + for r in trimmed + ] + return { "status": "ok", "summary": ( f"Found {len(all_results)} result(s) across " f"{len(searched_repos)} repo(s) for '{query}'" ), - "results": all_results[:limit], + "results": trimmed, "repos_searched": searched_repos, } except Exception as exc: diff --git a/code_review_graph/tools/review.py b/code_review_graph/tools/review.py index fcf205ff..819dc475 100644 --- a/code_review_graph/tools/review.py +++ b/code_review_graph/tools/review.py @@ -8,6 +8,7 @@ from ..changes import analyze_changes, parse_diff_ranges, parse_git_diff_ranges # noqa: F401 from ..flows import get_affected_flows as _get_affected_flows +from ..flows import summarize_flow from ..graph import edge_to_dict, node_to_dict from ..hints import generate_hints, get_session from ..incremental import get_changed_files, get_staged_and_unstaged @@ -286,6 +287,7 @@ def get_affected_flows_func( changed_files: list[str] | None = None, base: str = "HEAD~1", repo_root: str | None = None, + detail_level: str = "standard", ) -> dict[str, Any]: """Find execution flows affected by changed files. @@ -298,6 +300,9 @@ def get_affected_flows_func( Auto-detected from git diff if omitted. base: Git ref for auto-detecting changes (default: HEAD~1). repo_root: Repository root path. Auto-detected if omitted. + detail_level: "standard" returns full flow step data; "minimal" returns + a narrative summary paragraph and bullet IDs per flow, with the raw + step list moved to a ``detail`` key that agents can ignore. Returns: Affected flows sorted by criticality, with step details. @@ -317,21 +322,42 @@ def get_affected_flows_func( "total": 0, } - # Convert to absolute paths for graph lookup abs_files = [str(root / f) for f in changed_files] result = _get_affected_flows(store, abs_files) total = result["total"] - out = { - "status": "ok", - "summary": ( - f"{total} flow(s) affected by changes " - f"in {len(changed_files)} file(s)" - ), - "changed_files": changed_files, - "affected_flows": result["affected_flows"], - "total": total, - } + flows = result["affected_flows"] + + if detail_level == "minimal": + summaries = [summarize_flow(f) for f in flows] + high = sum(1 for s in summaries if s["criticality"] >= 0.7) + medium = sum(1 for s in summaries if 0.4 <= s["criticality"] < 0.7) + low = total - high - medium + + narrative = ( + f"{total} flow(s) affected by changes in {len(changed_files)} file(s): " + f"{high} high-criticality, {medium} medium, {low} low." + ) + out: dict[str, Any] = { + "status": "ok", + "summary": narrative, + "changed_files": changed_files, + "total": total, + "affected_flows": summaries, + "detail": flows, + } + else: + out = { + "status": "ok", + "summary": ( + f"{total} flow(s) affected by changes " + f"in {len(changed_files)} file(s)" + ), + "changed_files": changed_files, + "affected_flows": flows, + "total": total, + } + out["_hints"] = generate_hints( "get_affected_flows", out, get_session() ) @@ -466,3 +492,194 @@ def detect_changes_func( return {"status": "error", "error": str(exc)} finally: store.close() + + +# --------------------------------------------------------------------------- +# Tool (new): get_review_bundle [REVIEW] +# --------------------------------------------------------------------------- + + +def get_review_bundle( + base: str = "HEAD~1", + changed_files: list[str] | None = None, + max_nodes: int = 20, + include_snippets: bool = True, + repo_root: str | None = None, +) -> dict[str, Any]: + """Return a complete review package in a single round-trip. + + [REVIEW] Collapses the typical 2-4 tool call review workflow into one + response by combining: risk-scored change analysis, one-hop caller/callee + subgraph for changed symbols, affected flow summaries, and a structured + review checklist. + + Args: + base: Git ref to diff against (default: HEAD~1). + changed_files: Explicit list of changed file paths (relative to repo + root). Auto-detected from git diff if omitted. + max_nodes: Maximum neighbour nodes to include in the caller/callee + subgraph (default: 20). Higher values give more context at the + cost of tokens. + include_snippets: If True, include source snippets for changed + functions (default: True). + repo_root: Repository root path. Auto-detected if omitted. + + Returns: + Bundle containing: + - ``risk_score`` / ``risk`` (high/medium/low) + - ``changed_functions`` list (with optional source snippets) + - ``subgraph`` — one-hop caller and callee names for each changed symbol + - ``affected_flows`` — compact flow summaries + - ``test_gaps`` — list of untested changed functions + - ``review_checklist`` — structured list of actionable items + - ``review_priorities`` — top 5 priority items + """ + store, root = _get_store(repo_root) + try: + if changed_files is None: + changed_files = get_changed_files(root, base) + if not changed_files: + changed_files = get_staged_and_unstaged(root) + + if not changed_files: + return { + "status": "ok", + "summary": "No changed files detected. Nothing to review.", + "risk_score": 0.0, + "risk": "none", + "changed_functions": [], + "subgraph": {}, + "affected_flows": [], + "test_gaps": [], + "review_checklist": [], + "review_priorities": [], + } + + abs_files = [str(root / f) for f in changed_files] + + # 1. Risk-scored change analysis + diff_ranges = parse_diff_ranges(str(root), base) + abs_ranges: dict[str, list[tuple[int, int]]] = { + str(root / rel): ranges for rel, ranges in diff_ranges.items() + } + analysis = analyze_changes( + store, + changed_files=abs_files, + changed_ranges=abs_ranges if abs_ranges else None, + repo_root=str(root), + base=base, + ) + + risk_score: float = analysis.get("risk_score", 0.0) + risk = ( + "high" if risk_score >= 0.7 + else "medium" if risk_score >= 0.4 + else "low" + ) + + changed_funcs: list[dict] = analysis.get("changed_functions", []) + + # 2. Source snippets for changed functions + if include_snippets: + for func in changed_funcs: + fp = func.get("file_path") + ls = func.get("line_start") + le = func.get("line_end") + if fp and ls and le: + file_path = Path(fp) + if file_path.is_file(): + try: + lines = file_path.read_text(errors="replace").splitlines() + start = max(0, ls - 1) + end = min(len(lines), le) + func["source"] = "\n".join( + f"{i + 1}: {lines[i]}" for i in range(start, end) + ) + except (OSError, UnicodeDecodeError): + func["source"] = "(could not read file)" + + # 3. One-hop caller/callee subgraph for changed symbols + subgraph: dict[str, dict[str, list[str]]] = {} + nodes_added = 0 + for func in changed_funcs: + if nodes_added >= max_nodes: + break + qn: str | None = func.get("qualified_name") + if not qn: + continue + callers: list[str] = [] + callees: list[str] = [] + for e in store.get_edges_by_target(qn): + if e.kind == "CALLS": + callers.append(e.source_qualified) + nodes_added += 1 + if nodes_added >= max_nodes: + break + for e in store.get_edges_by_source(qn): + if e.kind == "CALLS": + callees.append(e.target_qualified) + nodes_added += 1 + if nodes_added >= max_nodes: + break + if callers or callees: + subgraph[qn] = {"callers": callers, "callees": callees} + + # 4. Affected flow summaries + flow_result = _get_affected_flows(store, abs_files) + flow_summaries = [summarize_flow(f) for f in flow_result["affected_flows"]] + + # 5. Test gaps + test_gaps: list[dict] = analysis.get("test_gaps", []) + + # 6. Review checklist + checklist: list[str] = [] + if test_gaps: + checklist.append( + f"[ ] Add tests for {len(test_gaps)} untested changed function(s): " + + ", ".join(g.get("name", "") for g in test_gaps[:3]) + + ("..." if len(test_gaps) > 3 else "") + ) + if risk == "high": + checklist.append( + "[ ] High blast radius — verify all callers listed in subgraph " + "handle the new behaviour correctly." + ) + if flow_summaries: + high_flows = [s for s in flow_summaries if s["criticality"] >= 0.7] + if high_flows: + checklist.append( + f"[ ] {len(high_flows)} high-criticality flow(s) affected — " + "perform end-to-end testing: " + + ", ".join(s["name"] for s in high_flows[:3]) + ) + if not checklist: + checklist.append("[ ] Changes appear well-contained — standard review applies.") + + priorities = analysis.get("review_priorities", [])[:5] + + summary = ( + f"Review bundle: {len(changed_files)} file(s) changed, " + f"risk={risk} ({risk_score:.2f}), " + f"{len(flow_summaries)} flow(s) affected, " + f"{len(test_gaps)} test gap(s)." + ) + + result: dict[str, Any] = { + "status": "ok", + "summary": summary, + "risk_score": risk_score, + "risk": risk, + "changed_files": changed_files, + "changed_functions": changed_funcs, + "subgraph": subgraph, + "affected_flows": flow_summaries, + "test_gaps": test_gaps, + "review_checklist": checklist, + "review_priorities": priorities, + } + result["_hints"] = generate_hints("detect_changes", result, get_session()) + return result + except Exception as exc: + return {"status": "error", "error": str(exc)} + finally: + store.close()