diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 45abe7d..1c3bee3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -17,11 +17,8 @@ on: env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" -# GitHub Pages requires these permissions on the deploy job. permissions: contents: read - pages: write - id-token: write # Allow only one concurrent deploy; cancel in-progress runs on a new push. concurrency: @@ -63,6 +60,10 @@ jobs: name: Deploy to GitHub Pages needs: build runs-on: ubuntu-latest + permissions: + contents: read + pages: write + id-token: write environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} diff --git a/README.md b/README.md index bf4f335..5dac2d9 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![Python 3.11+](https://img.shields.io/badge/Python-3.11+-green.svg)](https://python.org) [![PyPI](https://img.shields.io/pypi/v/claude-ctx.svg)](https://pypi.org/project/claude-ctx/) -[![Tests](https://img.shields.io/badge/Tests-3843_collected-brightgreen.svg)](#) +[![Tests](https://img.shields.io/badge/Tests-3872_collected-brightgreen.svg)](#) [![Graph](https://img.shields.io/badge/Graph-102%2C928_nodes_/_2.9M_edges-red.svg)](graph/) [![Docs](https://img.shields.io/badge/docs-MkDocs_Material-blue.svg)](https://stevesolun.github.io/ctx/) [![Repo views](https://hits.sh/github.com/stevesolun/ctx.svg?label=repo%20views)](https://hits.sh/github.com/stevesolun/ctx/) diff --git a/docs/index.md b/docs/index.md index e4e1e31..58b9a7b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -204,7 +204,7 @@ ones are flagged. New ones self-ingest. --- **v1.0.9** — MIT, CI-matrixed (Ubuntu 3.12 plus Windows/macOS 3.11/3.12), - 3,843 tests collected. Ships console scripts including `ctx-init`, + 3,872 tests collected. Ships console scripts including `ctx-init`, `ctx-monitor` (local dashboard with graph + wiki + load/unload for skills, agents, and MCP servers, plus Harness Setup for user-owned LLMs), `ctx-incremental-attach`, `ctx-incremental-shadow`, `ctx-dedup-check` diff --git a/scripts/ci_classifier.py b/scripts/ci_classifier.py index 539a27f..576faee 100644 --- a/scripts/ci_classifier.py +++ b/scripts/ci_classifier.py @@ -86,6 +86,12 @@ def _normalize_path(path: str) -> str: return path.strip().lstrip("\ufeff").replace("\\", "/") +def _is_graph_artifact_path(path: str) -> bool: + if _matches(path, GRAPH_ARTIFACT_PATTERNS): + return True + return _matches(path, ("graph/**",)) and path != "graph/README.md" + + def classify_paths(paths: Iterable[str]) -> dict[str, bool]: files = [ normalized @@ -94,9 +100,7 @@ def classify_paths(paths: Iterable[str]) -> dict[str, bool]: ] ci_changed = any(_matches(path, (".github/workflows/**",)) for path in files) docs_changed = any(_matches(path, DOCS_PATTERNS) for path in files) - graph_artifact_changed = any( - _matches(path, GRAPH_ARTIFACT_PATTERNS) for path in files - ) + graph_artifact_changed = any(_is_graph_artifact_path(path) for path in files) graph_only = bool(files) and all(_matches(path, ("graph/**",)) for path in files) return { "browser_changed": ci_changed diff --git a/scripts/ci_preflight.py b/scripts/ci_preflight.py index 11c891e..0854ef5 100644 --- a/scripts/ci_preflight.py +++ b/scripts/ci_preflight.py @@ -37,11 +37,11 @@ "--min-semantic-edges", "1000000", "--expected-nodes", - "102925", + "102928", "--expected-edges", - "2913930", + "2913960", "--expected-semantic-edges", - "1683163", + "1683193", "--expected-harness-nodes", "207", "--expected-skills-sh-nodes", @@ -51,11 +51,11 @@ "--expected-skills-sh-converted", "89465", "--expected-skill-pages", - "91463", + "91464", "--expected-agent-pages", "467", "--expected-mcp-pages", - "10788", + "10790", "--expected-harness-pages", "207", "--line-threshold", diff --git a/scripts/ci_required.py b/scripts/ci_required.py index d15cc7b..b48600d 100644 --- a/scripts/ci_required.py +++ b/scripts/ci_required.py @@ -67,6 +67,10 @@ def failed_required_jobs( event_name == "pull_request" and _job_output(needs, "classify", "graph_only") == "true" ) + graph_changed_pr = ( + event_name == "pull_request" + and _job_output(needs, "classify", "graph_changed") == "true" + ) graph_artifact_changed_pr = ( event_name == "pull_request" and _job_output(needs, "classify", "graph_artifact_changed") == "true" @@ -96,6 +100,7 @@ def failed_required_jobs( and name == "graph-check" and result == "skipped" and not graph_artifact_changed_pr + and not (graph_changed_pr and not docs_only_pr) ): continue if ( diff --git a/scripts/overlay_wiki_entities.py b/scripts/overlay_wiki_entities.py index 6012b80..6ee3a43 100644 --- a/scripts/overlay_wiki_entities.py +++ b/scripts/overlay_wiki_entities.py @@ -10,6 +10,7 @@ import argparse import json +import sys import tarfile import tempfile from collections import Counter @@ -18,9 +19,13 @@ from pathlib import Path from typing import Any -from ctx.core.wiki.artifact_promotion import promote_staged_artifact -from ctx.utils._fs_utils import atomic_write_text, reject_symlink_path -from scripts.build_dashboard_graph_index import build_dashboard_index +REPO_ROOT = Path(__file__).resolve().parent.parent +if str(REPO_ROOT) not in sys.path: + sys.path.insert(0, str(REPO_ROOT)) + +from ctx.core.wiki.artifact_promotion import promote_staged_artifact # noqa: E402 +from ctx.utils._fs_utils import atomic_write_text, reject_symlink_path # noqa: E402 +from scripts.build_dashboard_graph_index import build_dashboard_index # noqa: E402 GRAPH_EXPORT_NAMES = { "graphify-out/graph.json", @@ -215,7 +220,7 @@ def _collect_replacements( entity_type, slug = _split_node_id(node_id) page = _entity_page(source_wiki, entity_type, slug) if page is not None and (not runtime or entity_type == "harness"): - replacements[page.relative_to(source_wiki).as_posix()] = page.read_bytes() + replacements[page.relative_to(source_wiki).as_posix()] = _read_safe_bytes(page) if not runtime and entity_type == "skill": replacements.update(_skill_replacements(source_wiki, slug, skills_root=skills_root)) return replacements @@ -241,12 +246,13 @@ def _entity_page(source_wiki: Path, entity_type: str, slug: str) -> Path | None: ], }.get(entity_type, []) for candidate in candidates: - if candidate.is_file(): + if _is_safe_file(candidate): return candidate if entity_type == "mcp-server": matches = list((source_wiki / "entities" / "mcp-servers").rglob(f"{slug}.md")) - if matches: - return matches[0] + for match in matches: + if _is_safe_file(match): + return match return None @@ -257,16 +263,26 @@ def _skill_replacements(source_wiki: Path, slug: str, *, skills_root: Path | Non root / slug / "SKILL.md", ] for candidate in candidates: - if candidate.is_file(): + if _is_safe_file(candidate): skill_dir = candidate.parent return { - f"converted/{slug}/{path.relative_to(skill_dir).as_posix()}": path.read_bytes() + f"converted/{slug}/{path.relative_to(skill_dir).as_posix()}": _read_safe_bytes(path) for path in sorted(skill_dir.rglob("*")) - if path.is_file() and not path.name.endswith((".original", ".lock")) + if _is_safe_file(path) and not path.name.endswith((".original", ".lock")) } return {} +def _is_safe_file(path: Path) -> bool: + reject_symlink_path(path) + return path.is_file() + + +def _read_safe_bytes(path: Path) -> bytes: + reject_symlink_path(path) + return path.read_bytes() + + def _rewrite_tarball(tarball: Path, replacements: dict[str, bytes]) -> None: reject_symlink_path(tarball) staged = tarball.with_name(f"{tarball.name}.staged") diff --git a/src/ctx/adapters/claude_code/inject_hooks.py b/src/ctx/adapters/claude_code/inject_hooks.py index 58af8ba..c91f809 100644 --- a/src/ctx/adapters/claude_code/inject_hooks.py +++ b/src/ctx/adapters/claude_code/inject_hooks.py @@ -173,10 +173,40 @@ def merge_hooks(existing: dict, new_hooks: dict) -> dict: new_cmd = new_entry.get("command", "") new_hooks_list = new_entry.get("hooks", []) - # Check if any command in this entry already exists - new_cmds = {new_cmd} if new_cmd else {h.get("command", "") for h in new_hooks_list} - if not new_cmds.intersection(existing_commands): + if isinstance(new_hooks_list, list) and new_hooks_list: + missing_hooks = [ + hook for hook in new_hooks_list + if isinstance(hook, dict) + and hook.get("command") + and hook.get("command") not in existing_commands + ] + if not missing_hooks: + continue + matcher = new_entry.get("matcher") + target_entry = next( + ( + entry for entry in existing_list + if isinstance(entry, dict) + and entry.get("matcher") == matcher + and isinstance(entry.get("hooks"), list) + ), + None, + ) + if target_entry is not None: + target_entry["hooks"].extend(missing_hooks) + else: + entry = dict(new_entry) + entry["hooks"] = missing_hooks + existing_list.append(entry) + existing_commands.update( + hook["command"] for hook in missing_hooks + if isinstance(hook.get("command"), str) + ) + continue + + if new_cmd and new_cmd not in existing_commands: existing_list.append(new_entry) + existing_commands.add(new_cmd) return existing diff --git a/src/ctx/adapters/claude_code/install/install_utils.py b/src/ctx/adapters/claude_code/install/install_utils.py index 6e8585a..1e298f5 100644 --- a/src/ctx/adapters/claude_code/install/install_utils.py +++ b/src/ctx/adapters/claude_code/install/install_utils.py @@ -29,12 +29,12 @@ import json import logging -import os import re import shutil from pathlib import Path from typing import Callable, Literal +from ctx_config import cfg from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text from ctx.utils._file_lock import file_lock @@ -42,7 +42,7 @@ EntityType = Literal["skill", "agent", "mcp-server"] -MANIFEST_PATH = Path(os.path.expanduser("~/.claude/skill-manifest.json")) +MANIFEST_PATH = cfg.skill_manifest _FRONTMATTER_HEAD_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL) diff --git a/src/ctx/adapters/claude_code/install/mcp_install.py b/src/ctx/adapters/claude_code/install/mcp_install.py index 36f49b1..f94eef6 100644 --- a/src/ctx/adapters/claude_code/install/mcp_install.py +++ b/src/ctx/adapters/claude_code/install/mcp_install.py @@ -103,6 +103,7 @@ # npx / uvx / bunx intentionally unrestricted — they ARE the # package-launcher pattern MCP servers are expected to use. } +_WINDOWS_EXEC_SUFFIXES = (".exe", ".cmd", ".bat", ".ps1") _SECRET_KEY_MARKERS: tuple[str, ...] = ( "token", @@ -141,7 +142,7 @@ def _rejects_banned_args(tokens: list[str]) -> str | None: for the rest.""" if not tokens: return None - exe = tokens[0] + exe = _normalized_executable(tokens[0]) banned = _BANNED_INTERPRETER_ARGS.get(exe) if banned is None: return None @@ -208,6 +209,42 @@ def _find_inline_secret(obj: object, *, path: str = "") -> str | None: return None +def _normalized_executable(value: str) -> str: + name = Path(value).name.lower() + for suffix in _WINDOWS_EXEC_SUFFIXES: + if name.endswith(suffix): + return name[: -len(suffix)] + return name + + +def _find_inline_secret_arg(tokens: list[str]) -> str | None: + for token in tokens: + assignment = _SECRET_ASSIGNMENT_RE.search(token) + if assignment and not _placeholder_secret_value(assignment.group(2)): + return assignment.group(1) + for pattern in _TOKEN_VALUE_PATTERNS: + if pattern.search(token): + return token + if token.startswith("--") and "=" in token: + key, value = token.split("=", 1) + if _secret_key_like(key) and not _placeholder_secret_value(value): + return key + + for index, token in enumerate(tokens[:-1]): + if not token.startswith("-"): + continue + key = token.lstrip("-").replace("-", "_") + value = tokens[index + 1] + if ( + _secret_key_like(key) + and value + and not value.startswith("-") + and not _placeholder_secret_value(value) + ): + return token + return None + + def _redact_output(text: str) -> str: if not text: return text @@ -469,6 +506,25 @@ def install_mcp( ), ) + command_tokens: list[str] | None = None + if effective_cmd: + try: + command_tokens = _split_install_command(effective_cmd) + except ValueError as exc: + return InstallResult( + slug=slug, status="invalid-cmd", command=effective_cmd, + message=f"could not parse --cmd/install_cmd: {exc}", + ) + inline_secret_arg = _find_inline_secret_arg(command_tokens) + if inline_secret_arg is not None: + return InstallResult( + slug=slug, status="invalid-cmd", command=None, + message=( + f"--cmd/install_cmd argument {inline_secret_arg!r} looks like " + "an inline secret; pass an environment variable reference instead." + ), + ) + card = render_card(fm, slug, command=effective_cmd) print(card) @@ -489,13 +545,7 @@ def install_mcp( rc, stdout, stderr = _run_claude_mcp(["add-json", slug, json_config]) else: assert effective_cmd is not None # narrowed by dry_run branch above - try: - tokens = _split_install_command(effective_cmd) - except ValueError as exc: - return InstallResult( - slug=slug, status="invalid-cmd", command=effective_cmd, - message=f"could not parse --cmd/install_cmd: {exc}", - ) + tokens = command_tokens if command_tokens is not None else _split_install_command(effective_cmd) if not tokens: return InstallResult( slug=slug, status="invalid-cmd", command=effective_cmd, @@ -505,7 +555,8 @@ def install_mcp( # (which is under entity-file control); treat it as untrusted. # Only known MCP-runtime launchers are allowed — if your # server needs a bespoke runtime, add-json is the right path. - if tokens[0] not in _ALLOWED_CMD_EXECS: + executable = _normalized_executable(tokens[0]) + if executable not in _ALLOWED_CMD_EXECS: return InstallResult( slug=slug, status="invalid-cmd", command=effective_cmd, message=( diff --git a/src/ctx/adapters/claude_code/install/skill_unload.py b/src/ctx/adapters/claude_code/install/skill_unload.py index b8865d6..ddc24dc 100644 --- a/src/ctx/adapters/claude_code/install/skill_unload.py +++ b/src/ctx/adapters/claude_code/install/skill_unload.py @@ -22,11 +22,12 @@ from ctx.core.wiki.wiki_utils import validate_skill_name from ctx.utils._file_lock import file_lock from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text +from ctx_config import cfg -CLAUDE_DIR = Path(os.path.expanduser("~/.claude")) -MANIFEST_PATH = CLAUDE_DIR / "skill-manifest.json" +CLAUDE_DIR = cfg.claude_dir +MANIFEST_PATH = cfg.skill_manifest PENDING_UNLOAD = CLAUDE_DIR / "pending-unload.json" -WIKI_DIR = CLAUDE_DIR / "skill-wiki" +WIKI_DIR = cfg.wiki_dir SKILL_ENTITIES = WIKI_DIR / "entities" / "skills" AGENT_ENTITIES = WIKI_DIR / "entities" / "agents" diff --git a/src/ctx/adapters/claude_code/skill_health.py b/src/ctx/adapters/claude_code/skill_health.py index 42b41cc..f149f4d 100644 --- a/src/ctx/adapters/claude_code/skill_health.py +++ b/src/ctx/adapters/claude_code/skill_health.py @@ -32,7 +32,6 @@ import argparse import json -import os import sys import time from dataclasses import asdict, dataclass, field @@ -47,10 +46,10 @@ # ── Paths & config defaults ──────────────────────────────────────────────── -SKILLS_DIR = Path(os.path.expanduser("~/.claude/skills")) -AGENTS_DIR = Path(os.path.expanduser("~/.claude/agents")) -MANIFEST_PATH = Path(os.path.expanduser("~/.claude/skill-manifest.json")) -PENDING_PATH = Path(os.path.expanduser("~/.claude/pending-skills.json")) +SKILLS_DIR = cfg.skills_dir +AGENTS_DIR = cfg.agents_dir +MANIFEST_PATH = cfg.skill_manifest +PENDING_PATH = cfg.pending_skills DEFAULT_LINE_THRESHOLD = cfg.line_threshold DEFAULT_MIN_BODY_LINES = 5 diff --git a/src/ctx/adapters/claude_code/skill_loader.py b/src/ctx/adapters/claude_code/skill_loader.py index e518507..2a74bf5 100644 --- a/src/ctx/adapters/claude_code/skill_loader.py +++ b/src/ctx/adapters/claude_code/skill_loader.py @@ -16,12 +16,12 @@ import argparse import json import logging -import os import sys import uuid from pathlib import Path from typing import Any +from ctx_config import cfg from ctx.core.wiki.wiki_utils import validate_skill_name from ctx.utils._file_lock import file_lock from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text @@ -32,11 +32,11 @@ _SESSION_ID: str = uuid.uuid4().hex -SKILLS_DIR = Path(os.path.expanduser("~/.claude/skills")) -AGENTS_DIR = Path(os.path.expanduser("~/.claude/agents")) -WIKI_DIR = Path(os.path.expanduser("~/.claude/skill-wiki")) -PENDING_SKILLS = Path(os.path.expanduser("~/.claude/pending-skills.json")) -MANIFEST_PATH = Path(os.path.expanduser("~/.claude/skill-manifest.json")) +SKILLS_DIR = cfg.skills_dir +AGENTS_DIR = cfg.agents_dir +WIKI_DIR = cfg.wiki_dir +PENDING_SKILLS = cfg.pending_skills +MANIFEST_PATH = cfg.skill_manifest def _resolved_under(candidate: Path, base: Path) -> bool: diff --git a/src/ctx/core/graph/entity_overlays.py b/src/ctx/core/graph/entity_overlays.py index b192e15..c5c03a0 100644 --- a/src/ctx/core/graph/entity_overlays.py +++ b/src/ctx/core/graph/entity_overlays.py @@ -124,6 +124,16 @@ def merge_node_attrs(existing: Mapping[str, Any], incoming: Mapping[str, Any]) - return merged +def replace_node_attrs(existing: Mapping[str, Any], incoming: Mapping[str, Any]) -> dict[str, Any]: + """Overlay incoming non-empty node attrs onto an existing node.""" + merged = dict(existing) + for key, value in incoming.items(): + if key == "id" or value in _EMPTY_VALUES: + continue + merged[key] = value + return merged + + def merge_edge_attrs(existing: Mapping[str, Any], incoming: Mapping[str, Any]) -> dict[str, Any]: merged = dict(existing) for key, value in incoming.items(): @@ -140,6 +150,15 @@ def merge_edge_attrs(existing: Mapping[str, Any], incoming: Mapping[str, Any]) - return merged +def replace_edge_attrs(incoming: Mapping[str, Any]) -> dict[str, Any]: + """Return only the current non-empty edge attrs from an authoritative overlay.""" + return { + key: value + for key, value in incoming.items() + if key not in {"source", "target"} and value not in _EMPTY_VALUES + } + + def load_overlay_records(path: Path) -> list[dict[str, Any]]: """Load JSONL overlay records for writer paths. diff --git a/src/ctx/core/graph/resolve_graph.py b/src/ctx/core/graph/resolve_graph.py index 282d6e6..938091b 100644 --- a/src/ctx/core/graph/resolve_graph.py +++ b/src/ctx/core/graph/resolve_graph.py @@ -18,7 +18,9 @@ import os import sys from collections import defaultdict +from collections.abc import Mapping from pathlib import Path +from typing import Any import networkx as nx from networkx.readwrite import node_link_graph @@ -27,6 +29,8 @@ active_overlay_records, merge_edge_attrs, merge_node_attrs, + replace_edge_attrs, + replace_node_attrs, ) logger = logging.getLogger(__name__) @@ -189,6 +193,7 @@ def _apply_entity_overlays(G: nx.Graph, graph_path: Path) -> nx.Graph: records.append(payload) for payload in active_overlay_records(records): + authoritative_nodes = _authoritative_overlay_nodes(payload) nodes = payload.get("nodes", []) if isinstance(nodes, list): for node in nodes: @@ -199,7 +204,11 @@ def _apply_entity_overlays(G: nx.Graph, graph_path: Path) -> nx.Graph: continue attrs = {key: value for key, value in node.items() if key != "id"} if node_id in G: - attrs = merge_node_attrs(G.nodes[node_id], attrs) + attrs = ( + replace_node_attrs(G.nodes[node_id], attrs) + if node_id in authoritative_nodes + else merge_node_attrs(G.nodes[node_id], attrs) + ) G.add_node(node_id, **attrs) applied_nodes += 1 @@ -219,7 +228,12 @@ def _apply_entity_overlays(G: nx.Graph, graph_path: Path) -> nx.Graph: for key, value in edge.items() if key not in {"source", "target"} } - if G.has_edge(source, target): + authoritative_edge = source in authoritative_nodes or target in authoritative_nodes + if authoritative_edge: + attrs = replace_edge_attrs(edge) + if G.has_edge(source, target): + G.remove_edge(source, target) + elif G.has_edge(source, target): attrs = merge_edge_attrs(G.edges[source, target], attrs) G.add_edge(source, target, **attrs) applied_edges += 1 @@ -230,6 +244,33 @@ def _apply_entity_overlays(G: nx.Graph, graph_path: Path) -> nx.Graph: return G +def _authoritative_overlay_nodes(payload: Mapping[str, Any]) -> set[str]: + """Return node IDs whose overlay rows should replace current ANN scores.""" + kind = payload.get("kind") + attach_key = payload.get("attach_key") + replace_scope = payload.get("replace_scope") + is_ann_record = ( + kind == "ann_attach" + or (isinstance(attach_key, str) and attach_key.startswith("ann:v1:")) + or (isinstance(replace_scope, str) and replace_scope.startswith("ann:v1:")) + ) + if not is_ann_record: + return set() + + node_ids: set[str] = set() + node_id = payload.get("node_id") + if isinstance(node_id, str) and node_id: + node_ids.add(node_id) + nodes = payload.get("nodes") + if isinstance(nodes, list): + for node in nodes: + if isinstance(node, dict): + nested_id = node.get("id") + if isinstance(nested_id, str) and nested_id: + node_ids.add(nested_id) + return node_ids + + def load_graph( path: Path | None = None, *, diff --git a/src/ctx/core/graph/semantic_edges.py b/src/ctx/core/graph/semantic_edges.py index 703b709..07d5b3e 100644 --- a/src/ctx/core/graph/semantic_edges.py +++ b/src/ctx/core/graph/semantic_edges.py @@ -210,6 +210,13 @@ def _partition_for_incremental( removed = prior_ids - current_ids overlap = current_ids & prior_ids + if new: + # A new node can displace any existing node's prior top-K even when + # no prior row could reference it. Interactive single-entity adds use + # the ANN overlay attach path; the semantic graph builder must preserve + # parity with a full top-K rebuild. + return current_ids, set() + changed: set[str] = set() for nid in overlap: cached_hash = prior.nodes[nid].get("content_hash", "") @@ -524,6 +531,9 @@ def _topk_pairs( n = vecs.shape[0] out: dict[tuple[str, str], float] = {} + if top_k <= 0 or n <= 1: + return {} + # top_k + 1 because argpartition returns the node itself (cosine=1.0) # as its own nearest neighbor. We mask self below, but asking for # one extra is defensive against ties. @@ -542,17 +552,16 @@ def _topk_pairs( idx_unsorted = np.argpartition(-sims, effective_k - 1, axis=1)[:, :effective_k] for i in range(end - start): src_id = node_ids[start + i] - for j in idx_unsorted[i]: - if j == start + i: - continue - score = float(sims[i, j]) - if score < min_cosine: - continue - dst_id = node_ids[int(j)] - pair = (src_id, dst_id) if src_id < dst_id else (dst_id, src_id) - existing = out.get(pair) - if existing is None or score > existing: - out[pair] = score + _merge_exact_topk_row( + out, + src_id=src_id, + src_index=start + i, + node_ids=node_ids, + scores=sims[i], + candidate_indices=idx_unsorted[i], + top_k=top_k, + min_cosine=min_cosine, + ) if end == n or (start // chunk_size + 1) % 10 == 0: print( "semantic_edges: top-k rows " @@ -848,6 +857,8 @@ def _topk_pairs_subset( if not subset_indices: return {} + if top_k <= 0 or vecs.shape[0] <= 1: + return {} out: dict[tuple[str, str], float] = {} effective_k = min(top_k + 1, vecs.shape[0]) @@ -867,18 +878,16 @@ def _topk_pairs_subset( idx_unsorted = np.argpartition(-sims, effective_k - 1, axis=1)[:, :effective_k] for i, abs_i in enumerate(chunk_indices): src_id = node_ids[abs_i] - for j in idx_unsorted[i]: - j = int(j) - if j == abs_i: - continue - score = float(sims[i, j]) - if score < min_cosine: - continue - dst_id = node_ids[j] - pair = (src_id, dst_id) if src_id < dst_id else (dst_id, src_id) - existing = out.get(pair) - if existing is None or score > existing: - out[pair] = score + _merge_exact_topk_row( + out, + src_id=src_id, + src_index=abs_i, + node_ids=node_ids, + scores=sims[i], + candidate_indices=idx_unsorted[i], + top_k=top_k, + min_cosine=min_cosine, + ) return out @@ -989,6 +998,41 @@ def _merge_neighbor_rows( break +def _merge_exact_topk_row( + out: dict[tuple[str, str], float], + *, + src_id: str, + src_index: int, + node_ids: Sequence[str], + scores: "np.ndarray", + candidate_indices: Sequence[int], + top_k: int, + min_cosine: float, +) -> None: + if top_k <= 0: + return + ordered = sorted( + (int(index) for index in candidate_indices), + key=lambda index: float(scores[index]), + reverse=True, + ) + emitted = 0 + for j in ordered: + if j == src_index: + continue + score = float(scores[j]) + if score < min_cosine: + continue + dst_id = node_ids[j] + pair = (src_id, dst_id) if src_id < dst_id else (dst_id, src_id) + existing = out.get(pair) + if existing is None or score > existing: + out[pair] = score + emitted += 1 + if emitted >= top_k: + break + + def _reuse_prior_pairs( prior: TopKState, unchanged: set[str], diff --git a/src/ctx_init.py b/src/ctx_init.py index aecce43..eee1639 100644 --- a/src/ctx_init.py +++ b/src/ctx_init.py @@ -218,6 +218,14 @@ def _resolve_ctx_src_dir() -> Path: _GRAPH_ARCHIVE_NAME = "wiki-graph.tar.gz" _GRAPH_RUNTIME_ARCHIVE_NAME = "wiki-graph-runtime.tar.gz" _GRAPH_ENTITY_OVERLAY_NAME = "entity-overlays.jsonl" +_GRAPH_ENTITY_OVERLAY_SCORE_FIELDS = ( + "weight", + "final_weight", + "similarity_score", + "semantic_sim", + "tag_sim", + "token_sim", +) _GRAPH_ENTITY_OVERLAY_SHA256 = ( "cc1a69d3452d2018bec1e049fc4ab1fa8f933adecfdcae4802a815be03f8611c" ) @@ -436,7 +444,8 @@ def _validate_graph_entity_overlay(path: Path) -> None: raise ValueError( f"{path} line {lineno} edge {index} must contain source/target" ) - for field in ("weight", "final_weight", "similarity_score"): + numeric_scores: dict[str, float] = {} + for field in _GRAPH_ENTITY_OVERLAY_SCORE_FIELDS: value = edge.get(field) if value is None: continue @@ -444,6 +453,15 @@ def _validate_graph_entity_overlay(path: Path) -> None: raise ValueError( f"{path} line {lineno} edge {index} {field} must be 0..1" ) + numeric_scores[field] = float(value) + if ( + "weight" in numeric_scores + and "final_weight" in numeric_scores + and abs(numeric_scores["weight"] - numeric_scores["final_weight"]) > 1e-9 + ): + raise ValueError( + f"{path} line {lineno} edge {index} weight must equal final_weight" + ) def _release_graph_url(install_mode: str = "runtime") -> str: diff --git a/src/ctx_monitor.py b/src/ctx_monitor.py index 423edd0..313c1c6 100644 --- a/src/ctx_monitor.py +++ b/src/ctx_monitor.py @@ -75,7 +75,7 @@ from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer from pathlib import Path, PurePosixPath from typing import Any -from urllib.parse import quote, unquote +from urllib.parse import quote, unquote, urlsplit from ctx.core.wiki import wiki_queue from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body @@ -112,6 +112,26 @@ def _host_allows_mutations(host: str) -> bool: return False +def _request_host_name(host_header: str) -> str: + value = (host_header or "").strip() + if not value: + return "" + if value.startswith("["): + end = value.find("]") + return value[1:end].rstrip(".").lower() if end != -1 else "" + return value.rsplit(":", 1)[0].rstrip(".").lower() + + +def _origin_host_name(origin: str) -> str: + try: + parsed = urlsplit(origin) + except ValueError: + return "" + if parsed.scheme not in {"http", "https"}: + return "" + return (parsed.hostname or "").rstrip(".").lower() + + def _claude_dir() -> Path: return Path(os.path.expanduser("~/.claude")) @@ -6362,11 +6382,12 @@ def log_message(self, fmt: str, *args: Any) -> None: # require same-origin POSTs plus a per-process token injected into the # served dashboard page. def _same_origin(self) -> bool: + request_host = _request_host_name(self.headers.get("Host", "")) + if not _host_allows_mutations(request_host): + return False origin = self.headers.get("Origin") or "" if origin: - host_header = self.headers.get("Host", "") - expected = f"http://{host_header}" - return origin == expected + return _origin_host_name(origin) == request_host # No Origin header (curl, direct tool calls) is acceptable only # when the mutation token below is also present. return True diff --git a/src/harness_install.py b/src/harness_install.py index 341f7ea..2f64682 100644 --- a/src/harness_install.py +++ b/src/harness_install.py @@ -252,6 +252,16 @@ def _local_source_from_repo_url(repo_url: str) -> Path | None: return candidate if candidate.exists() else None +def _validate_remote_repo_url(repo_url: str) -> None: + parsed = urlparse(repo_url) + if repo_url.startswith("-"): + raise ValueError("remote harness repo_url must not start with '-'") + if parsed.scheme != "https" or not parsed.netloc: + raise ValueError("remote harness repo_url must be an https:// URL") + if parsed.username or parsed.password: + raise ValueError("remote harness repo_url must not include credentials") + + def _reject_symlink_tree(root: Path) -> None: if root.is_symlink(): raise ValueError(f"refusing symlinked harness source: {root}") @@ -265,9 +275,11 @@ def _is_full_commit_sha(value: str | None) -> bool: def _run_git(args: list[str], *, timeout: int = 300) -> subprocess.CompletedProcess[str]: + env = _command_env() + env["GIT_ALLOW_PROTOCOL"] = "https" return subprocess.run( ["git", *args], - env=_command_env(), + env=env, capture_output=True, text=True, check=False, @@ -296,7 +308,7 @@ def _materialize_source( if not allow_local_sources: raise ValueError( "local harness repo_url requires --allow-local-source; " - "cataloged harnesses should normally use https:// repositories" + "cataloged harnesses should normally use https:// repositories" ) local_source = local_source.expanduser().resolve() if not local_source.is_dir(): @@ -305,6 +317,8 @@ def _materialize_source( shutil.copytree(local_source, target) return {"source_type": "local"} + _validate_remote_repo_url(record.repo_url) + if record.repo_ref and not _is_full_commit_sha(record.repo_ref): raise ValueError( "harness repo_ref/commit_sha must be a full commit SHA; " @@ -314,10 +328,10 @@ def _materialize_source( raise ValueError( "remote harness repo_url is not pinned to a commit; add commit_sha " "to the catalog page or pass --allow-mutable-repo-head explicitly" - ) + ) if record.repo_ref: - proc = _run_git(["clone", "--no-checkout", record.repo_url, str(target)]) + proc = _run_git(["clone", "--no-checkout", "--", record.repo_url, str(target)]) if proc.returncode != 0: stderr = proc.stderr.strip() or proc.stdout.strip() raise RuntimeError(f"git clone failed: {stderr}") @@ -337,7 +351,7 @@ def _materialize_source( "resolved_commit": _git_resolved_commit(target) or "", } - proc = _run_git(["clone", "--depth", "1", record.repo_url, str(target)]) + proc = _run_git(["clone", "--depth", "1", "--", record.repo_url, str(target)]) if proc.returncode != 0: stderr = proc.stderr.strip() or proc.stdout.strip() raise RuntimeError(f"git clone failed: {stderr}") diff --git a/src/tests/test_ci_classifier.py b/src/tests/test_ci_classifier.py index 3c9956f..3d9bf0a 100644 --- a/src/tests/test_ci_classifier.py +++ b/src/tests/test_ci_classifier.py @@ -70,6 +70,16 @@ def test_graph_preview_html_is_graph_artifact() -> None: assert flags["graph_only"] is True +def test_unknown_graph_file_is_graph_artifact() -> None: + flags = classify_paths(["graph/notes.json"]) + + assert flags["docs_changed"] is False + assert flags["docs_only"] is False + assert flags["graph_artifact_changed"] is True + assert flags["graph_changed"] is True + assert flags["graph_only"] is True + + def test_graph_readme_is_docs_not_graph_artifact() -> None: flags = classify_paths(["graph/README.md"]) @@ -534,6 +544,25 @@ def test_ci_required_rejects_missing_graph_check_on_graph_only_pr() -> None: } +def test_ci_required_rejects_missing_graph_check_on_unknown_graph_change() -> None: + needs = _required_needs( + classify={ + "result": "success", + "outputs": { + "docs_only": "false", + "graph_artifact_changed": "false", + "graph_changed": "true", + "graph_only": "true", + }, + }, + **{"graph-check": {"result": "skipped"}}, + ) + + assert failed_required_jobs(needs, event_name="pull_request") == { + "graph-check": "skipped", + } + + def test_ci_required_allows_browser_skip_for_unrelated_pr_only() -> None: needs = _required_needs( classify={"result": "success", "outputs": {"browser_changed": "false"}}, diff --git a/src/tests/test_config.py b/src/tests/test_config.py index 2cd82f8..34b3126 100644 --- a/src/tests/test_config.py +++ b/src/tests/test_config.py @@ -162,6 +162,25 @@ def test_custom_tilde_path_expanded(self) -> None: assert cfg.wiki_dir.is_absolute() +def test_claude_adapter_modules_use_cfg_paths() -> None: + from ctx.adapters.claude_code import skill_health, skill_loader + from ctx.adapters.claude_code.install import install_utils, skill_unload + + assert install_utils.MANIFEST_PATH == ctx_config.cfg.skill_manifest + assert skill_loader.SKILLS_DIR == ctx_config.cfg.skills_dir + assert skill_loader.AGENTS_DIR == ctx_config.cfg.agents_dir + assert skill_loader.WIKI_DIR == ctx_config.cfg.wiki_dir + assert skill_loader.PENDING_SKILLS == ctx_config.cfg.pending_skills + assert skill_loader.MANIFEST_PATH == ctx_config.cfg.skill_manifest + assert skill_unload.CLAUDE_DIR == ctx_config.cfg.claude_dir + assert skill_unload.WIKI_DIR == ctx_config.cfg.wiki_dir + assert skill_unload.MANIFEST_PATH == ctx_config.cfg.skill_manifest + assert skill_health.SKILLS_DIR == ctx_config.cfg.skills_dir + assert skill_health.AGENTS_DIR == ctx_config.cfg.agents_dir + assert skill_health.PENDING_PATH == ctx_config.cfg.pending_skills + assert skill_health.MANIFEST_PATH == ctx_config.cfg.skill_manifest + + class TestConfigReload: """test_config_reload -- reload() picks up changes to the raw config.""" diff --git a/src/tests/test_ctx_init.py b/src/tests/test_ctx_init.py index b9cec0f..62d3451 100644 --- a/src/tests/test_ctx_init.py +++ b/src/tests/test_ctx_init.py @@ -571,6 +571,57 @@ def test_graph_install_copies_local_entity_overlay( assert payload["edges"][0]["method"] == "manual_direct_overlay_v1" +@pytest.mark.parametrize("field", ["semantic_sim", "tag_sim", "token_sim"]) +def test_graph_overlay_validation_rejects_out_of_range_similarity_fields( + tmp_path: Path, + field: str, +) -> None: + overlay = tmp_path / "entity-overlays.jsonl" + overlay.write_text( + json.dumps({ + "nodes": [{"id": "skill:a"}], + "edges": [ + { + "source": "skill:a", + "target": "skill:b", + "weight": 0.5, + "final_weight": 0.5, + field: 2.0, + }, + ], + }) + + "\n", + encoding="utf-8", + ) + + with pytest.raises(ValueError, match=f"{field} must be 0..1"): + ci._validate_graph_entity_overlay(overlay) + + +def test_graph_overlay_validation_rejects_weight_final_weight_drift( + tmp_path: Path, +) -> None: + overlay = tmp_path / "entity-overlays.jsonl" + overlay.write_text( + json.dumps({ + "nodes": [{"id": "skill:a"}], + "edges": [ + { + "source": "skill:a", + "target": "skill:b", + "weight": 0.7, + "final_weight": 0.5, + }, + ], + }) + + "\n", + encoding="utf-8", + ) + + with pytest.raises(ValueError, match="weight must equal final_weight"): + ci._validate_graph_entity_overlay(overlay) + + def test_runtime_graph_install_extracts_harness_pages_after_required_files( tmp_path: Path, monkeypatch, diff --git a/src/tests/test_ctx_monitor.py b/src/tests/test_ctx_monitor.py index b03cc81..81d0ae2 100644 --- a/src/tests/test_ctx_monitor.py +++ b/src/tests/test_ctx_monitor.py @@ -780,6 +780,40 @@ def fake_load(slug: str, entity_type: str = "skill") -> tuple[bool, str]: thread.join(timeout=2) +def test_monitor_post_rejects_rebound_host_with_valid_token( + fake_claude: Path, monkeypatch: pytest.MonkeyPatch, +) -> None: + calls: list[str] = [] + + def fake_load(slug: str, entity_type: str = "skill") -> tuple[bool, str]: + calls.append(slug) + return True, f"loaded {entity_type}" + + monkeypatch.setattr(cm, "_perform_load", fake_load) + server, thread, port = _serve_monitor(monkeypatch) + body = json.dumps({"slug": "python-patterns"}).encode("utf-8") + try: + status, payload = _post_raw( + port, + "/api/load", + headers={ + "Host": "evil.example", + "Content-Type": "application/json", + "Content-Length": str(len(body)), + "X-CTX-Monitor-Token": "test-token", + "Origin": "http://evil.example", + }, + body=body, + ) + assert status == 403 + assert "cross-origin" in payload["detail"] + assert calls == [] + finally: + server.shutdown() + server.server_close() + thread.join(timeout=2) + + @pytest.mark.parametrize( ("length", "status", "detail"), [ diff --git a/src/tests/test_harness_install.py b/src/tests/test_harness_install.py index 12e2842..c4dc2ca 100644 --- a/src/tests/test_harness_install.py +++ b/src/tests/test_harness_install.py @@ -258,6 +258,33 @@ def test_remote_install_requires_pinned_commit_by_default(tmp_path: Path) -> Non assert not (tmp_path / "installs" / "text-to-cad").exists() +@pytest.mark.parametrize( + "repo_url", + [ + "ssh://github.com/earthtojake/text-to-cad", + "https://token@example.test/private/repo", + "-c.helper=!calc", + ], +) +def test_remote_install_rejects_unsafe_repo_urls( + tmp_path: Path, + repo_url: str, +) -> None: + wiki = tmp_path / "wiki" + _write_harness_page(wiki, repo_url=repo_url, commit_sha="a" * 40) + + result = harness_install.install_harness( + "text-to-cad", + wiki_path=wiki, + installs_root=tmp_path / "installs", + manifest_dir=tmp_path / "manifests", + ) + + assert result.status == "install-failed" + assert "repo_url" in result.message + assert not (tmp_path / "installs" / "text-to-cad").exists() + + def test_remote_install_fetches_pinned_commit_and_records_manifest( tmp_path: Path, monkeypatch: Any, @@ -288,6 +315,7 @@ def fake_run_git(args: list[str], *, timeout: int = 300) -> _FakeRun: assert result.status == "installed" assert git_calls[0][:2] == ["clone", "--no-checkout"] + assert git_calls[0][2] == "--" assert any( call[0] == "-C" and call[2:6] == ["fetch", "--depth", "1", "origin"] diff --git a/src/tests/test_inject_hooks_security.py b/src/tests/test_inject_hooks_security.py index def3674..ccd728a 100644 --- a/src/tests/test_inject_hooks_security.py +++ b/src/tests/test_inject_hooks_security.py @@ -128,6 +128,29 @@ def test_from_stdin_flag_present_in_posttooluse_commands( f"{cmds_with_stdin}" ) + def test_merge_hooks_repairs_partial_posttooluse_matcher( + self, + tmp_path: Path, + ) -> None: + new_hooks = make_hooks(str(tmp_path / "ctx")) + existing = { + "hooks": { + "PostToolUse": [ + { + "matcher": ".*", + "hooks": [new_hooks["PostToolUse"][0]["hooks"][0]], + }, + ], + }, + } + + merged = merge_hooks(existing, new_hooks) + + commands = _all_commands({"PostToolUse": merged["hooks"]["PostToolUse"]}) + assert any("context_monitor" in command for command in commands) + assert any("skill_add_detector" in command for command in commands) + assert any("bundle_orchestrator" in command for command in commands) + # --------------------------------------------------------------------------- # Fix 2 — Stop array contains both usage_tracker and quality_on_session_end diff --git a/src/tests/test_mcp_install.py b/src/tests/test_mcp_install.py index 14db02f..884bc82 100644 --- a/src/tests/test_mcp_install.py +++ b/src/tests/test_mcp_install.py @@ -395,6 +395,38 @@ def test_windows_command_split_preserves_drive_path_backslashes( "two words", ] + @pytest.mark.parametrize("cmd", ["npx.cmd -y pkg", "python.exe server.py"]) + def test_windows_wrapper_executables_are_allowlisted( + self, + wiki_dir: Path, + fake_claude: dict[str, Any], + isolated_manifest: Path, + cmd: str, + ) -> None: + _write_entity(wiki_dir, "srv", {"status": "cataloged"}) + + r = mcp_install.install_mcp("srv", wiki_dir=wiki_dir, command=cmd, auto=True) + + assert r.status == "installed" + + def test_windows_wrapper_still_rejects_code_execution_args( + self, + wiki_dir: Path, + fake_claude: dict[str, Any], + isolated_manifest: Path, + ) -> None: + _write_entity(wiki_dir, "srv", {"status": "cataloged"}) + + r = mcp_install.install_mcp( + "srv", + wiki_dir=wiki_dir, + command='python.exe -c "print(1)"', + auto=True, + ) + + assert r.status == "invalid-cmd" + assert fake_claude["calls"] == [] + # Strix vuln-0002 regression: even when the first token is allowlisted, # code-execution argument forms must be rejected. A tampered frontmatter # install_cmd could otherwise invoke arbitrary interpreter-controlled @@ -451,6 +483,47 @@ def test_accepts_supported_launcher_patterns( f"legitimate launcher {cmd!r} falsely rejected (msg={r.message})" ) + @pytest.mark.parametrize( + "cmd", + [ + "npx -y pkg GITHUB_TOKEN=ghp_supersecret123456789012345", + "npx -y pkg --api-key sk-supersecret123456789012345", + "npx -y pkg --client-secret=plain-secret-value", + ], + ) + def test_install_cmd_rejects_inline_secret_arguments( + self, + wiki_dir: Path, + fake_claude: dict[str, Any], + isolated_manifest: Path, + cmd: str, + ) -> None: + _write_entity(wiki_dir, "srv", {"status": "cataloged"}) + + r = mcp_install.install_mcp("srv", wiki_dir=wiki_dir, command=cmd, auto=True) + + assert r.status == "invalid-cmd" + assert "inline secret" in r.message + assert fake_claude["calls"] == [] + + def test_install_cmd_allows_secret_env_reference( + self, + wiki_dir: Path, + fake_claude: dict[str, Any], + isolated_manifest: Path, + ) -> None: + _write_entity(wiki_dir, "srv", {"status": "cataloged"}) + + r = mcp_install.install_mcp( + "srv", + wiki_dir=wiki_dir, + command="npx -y pkg --api-key $API_KEY", + auto=True, + ) + + assert r.status == "installed" + assert fake_claude["calls"] + def test_empty_command_tokens_rejected( self, wiki_dir: Path, fake_claude: dict[str, Any], isolated_manifest: Path ) -> None: diff --git a/src/tests/test_overlay_wiki_entities.py b/src/tests/test_overlay_wiki_entities.py index 01e6040..fbc78f2 100644 --- a/src/tests/test_overlay_wiki_entities.py +++ b/src/tests/test_overlay_wiki_entities.py @@ -2,11 +2,17 @@ import json import sqlite3 +import subprocess +import sys import tarfile from datetime import datetime, timezone from pathlib import Path -from scripts.overlay_wiki_entities import overlay_entities +import pytest + +from scripts.overlay_wiki_entities import _entity_page, _skill_replacements, overlay_entities + +ROOT = Path(__file__).resolve().parents[2] def _add_text(tf: tarfile.TarFile, name: str, text: str) -> None: @@ -117,3 +123,50 @@ def test_overlay_entities_preserves_existing_graph_and_adds_selected_pages(tmp_p ("new-skill",), ).fetchone() == ("skill:new-skill",) assert json.loads(root_communities.read_text())["export_id"] == stats.export_id + + +def test_script_direct_invocation_help_works() -> None: + proc = subprocess.run( + [sys.executable, str(ROOT / "scripts" / "overlay_wiki_entities.py"), "--help"], + cwd=str(ROOT), + capture_output=True, + text=True, + check=False, + timeout=30, + ) + + assert proc.returncode == 0 + assert "Overlay explicit local wiki entities" in proc.stdout + + +def test_overlay_rejects_symlinked_entity_page(tmp_path: Path) -> None: + source_wiki = tmp_path / "wiki" + (source_wiki / "entities" / "skills").mkdir(parents=True) + outside = tmp_path / "outside.md" + outside.write_text("secret", encoding="utf-8") + link = source_wiki / "entities" / "skills" / "new-skill.md" + try: + link.symlink_to(outside) + except OSError as exc: + pytest.skip(f"symlinks unavailable in this environment: {exc}") + + with pytest.raises(ValueError, match="symlinked path"): + _entity_page(source_wiki, "skill", "new-skill") + + +def test_overlay_rejects_symlinked_skill_reference(tmp_path: Path) -> None: + source_wiki = tmp_path / "wiki" + skills_root = tmp_path / "skills" + skill_dir = skills_root / "new-skill" + (skill_dir / "references").mkdir(parents=True) + (skill_dir / "SKILL.md").write_text("# body\n", encoding="utf-8") + outside = tmp_path / "outside.md" + outside.write_text("secret", encoding="utf-8") + link = skill_dir / "references" / "leak.md" + try: + link.symlink_to(outside) + except OSError as exc: + pytest.skip(f"symlinks unavailable in this environment: {exc}") + + with pytest.raises(ValueError, match="symlinked path"): + _skill_replacements(source_wiki, "new-skill", skills_root=skills_root) diff --git a/src/tests/test_resolve_graph_queries.py b/src/tests/test_resolve_graph_queries.py index f260867..dcee258 100644 --- a/src/tests/test_resolve_graph_queries.py +++ b/src/tests/test_resolve_graph_queries.py @@ -294,6 +294,54 @@ def test_entity_overlay_upsert_after_tombstone_is_active( assert "skill:deleted" in G assert G.has_edge("skill:deleted", "skill:A") + def test_ann_overlay_replaces_existing_edge_scores_for_updated_node( + self, + tmp_path: Path, + ) -> None: + source = _build_simple_graph() + p = tmp_path / "graph.json" + p.write_text(json.dumps(_serialise_graph(source)), encoding="utf-8") + (tmp_path / "entity-overlays.jsonl").write_text( + json.dumps({ + "kind": "ann_attach", + "attach_key": "ann:v1:model:skill:A:updated", + "replace_scope": "ann:v1:model:skill:A", + "node_id": "skill:A", + "nodes": [ + { + "id": "skill:A", + "type": "skill", + "label": "Updated Alpha", + "content_hash": "updated", + }, + ], + "edges": [ + { + "source": "skill:A", + "target": "skill:B", + "weight": 0.2, + "final_weight": 0.2, + "semantic_sim": 0.8, + "similarity_score": 0.8, + "method": "ann_attach_v1", + }, + ], + }) + + "\n", + encoding="utf-8", + ) + + G = resolve_graph.load_graph(p) + + assert G.nodes["skill:A"]["label"] == "Updated Alpha" + edge = G.edges["skill:A", "skill:B"] + assert edge["weight"] == pytest.approx(0.2) + assert edge["final_weight"] == pytest.approx(0.2) + assert edge["semantic_sim"] == pytest.approx(0.8) + assert edge["similarity_score"] == pytest.approx(0.8) + assert edge["method"] == "ann_attach_v1" + assert "shared_tags" not in edge + def test_entity_overlay_does_not_lower_existing_edge(self, tmp_path: Path) -> None: source = _build_simple_graph() p = tmp_path / "graph.json" diff --git a/src/tests/test_semantic_edges.py b/src/tests/test_semantic_edges.py index 6b40be4..3ba6357 100644 --- a/src/tests/test_semantic_edges.py +++ b/src/tests/test_semantic_edges.py @@ -362,6 +362,19 @@ def test_top_k_limits_per_row_neighbors(self) -> None: # Orthogonal → cosine=0 everywhere → no pairs above 0.5 assert pairs == {} + def test_top_k_caps_non_orthogonal_rows(self) -> None: + vecs = _l2_normalize(np.array([ + [1.0, 0.0], + [0.99, 0.01], + [0.5, 0.5], + ], dtype="float32")) + pairs = _topk_pairs(vecs, ["a", "b", "c"], top_k=1, min_cosine=0.0) + assert pairs == { + ("a", "b"): pytest.approx(float(vecs[0] @ vecs[1])), + ("b", "c"): pytest.approx(float(vecs[1] @ vecs[2])), + } + assert ("a", "c") not in pairs + def test_small_chunk_size_gives_same_result(self) -> None: vecs = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0]], dtype="float32") ids = ["x", "y", "z"] @@ -415,6 +428,16 @@ def test_min_cosine_filter(self) -> None: assert not _topk_pairs_subset(vecs, ids, [0], top_k=1, min_cosine=1.1) + def test_subset_top_k_caps_non_orthogonal_row(self) -> None: + vecs = _l2_normalize(np.array([ + [1.0, 0.0], + [0.99, 0.01], + [0.5, 0.5], + ], dtype="float32")) + pairs = _topk_pairs_subset(vecs, ["a", "b", "c"], [0], top_k=1, min_cosine=0.0) + assert pairs == {("a", "b"): pytest.approx(float(vecs[0] @ vecs[1]))} + + class TestTopKPairsSubsetWithOptionalIndex: def test_numpy_flat_matches_exact_subset_and_filters_self(self) -> None: vecs = np.array( @@ -828,7 +851,7 @@ def test_removed_node_contaminates_neighbor(self) -> None: assert "b" in need assert "a" in unchanged - def test_new_node_does_not_contaminate_unchanged_with_unrelated_neighbors(self) -> None: + def test_new_node_recomputes_all_rows_to_preserve_full_topk_parity(self) -> None: h_a = _content_hash("text-a") h_b = _content_hash("text-b") prior = self._prior({ @@ -842,10 +865,8 @@ def test_new_node_does_not_contaminate_unchanged_with_unrelated_neighbors(self) SemanticNode("new", "text-new"), ] need, unchanged = _partition_for_incremental(nodes, prior) - assert "new" in need - # a and b don't have "new" in their prior top_k → not contaminated - assert "a" in unchanged - assert "b" in unchanged + assert need == {"a", "b", "new"} + assert unchanged == set() def test_empty_top_k_list_entry_skipped(self) -> None: h_a = _content_hash("ta") diff --git a/src/tests/test_skill_loader.py b/src/tests/test_skill_loader.py index 797fe85..4a362a1 100644 --- a/src/tests/test_skill_loader.py +++ b/src/tests/test_skill_loader.py @@ -166,6 +166,19 @@ def fake_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): from ctx.adapters.claude_code import skill_loader importlib.reload(skill_loader) + monkeypatch.setattr(skill_loader, "SKILLS_DIR", home / ".claude" / "skills") + monkeypatch.setattr(skill_loader, "AGENTS_DIR", home / ".claude" / "agents") + monkeypatch.setattr(skill_loader, "WIKI_DIR", home / ".claude" / "skill-wiki") + monkeypatch.setattr( + skill_loader, + "PENDING_SKILLS", + home / ".claude" / "pending-skills.json", + ) + monkeypatch.setattr( + skill_loader, + "MANIFEST_PATH", + home / ".claude" / "skill-manifest.json", + ) return skill_loader, home diff --git a/src/tests/test_skill_unload.py b/src/tests/test_skill_unload.py index cbd2329..bebbfe1 100644 --- a/src/tests/test_skill_unload.py +++ b/src/tests/test_skill_unload.py @@ -39,6 +39,24 @@ def fake_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): from ctx.adapters.claude_code.install import skill_unload importlib.reload(skill_unload) + monkeypatch.setattr(skill_unload, "CLAUDE_DIR", home / ".claude") + monkeypatch.setattr( + skill_unload, + "MANIFEST_PATH", + home / ".claude" / "skill-manifest.json", + ) + monkeypatch.setattr(skill_unload, "PENDING_UNLOAD", home / ".claude" / "pending-unload.json") + monkeypatch.setattr(skill_unload, "WIKI_DIR", home / ".claude" / "skill-wiki") + monkeypatch.setattr( + skill_unload, + "SKILL_ENTITIES", + home / ".claude" / "skill-wiki" / "entities" / "skills", + ) + monkeypatch.setattr( + skill_unload, + "AGENT_ENTITIES", + home / ".claude" / "skill-wiki" / "entities" / "agents", + ) return skill_unload, home diff --git a/src/tests/test_update_repo_stats.py b/src/tests/test_update_repo_stats.py index 13654af..3e0f515 100644 --- a/src/tests/test_update_repo_stats.py +++ b/src/tests/test_update_repo_stats.py @@ -166,8 +166,21 @@ def test_test_badge_is_labeled_collected_not_passing() -> None: def test_docs_landing_test_count_is_updated() -> None: text = "CI-matrixed, 3,617 tests collected. Ships console scripts." + stats = { + "nodes": None, + "edges": None, + "skills": None, + "agents": None, + "mcps": None, + "harnesses": None, + "communities": None, + } patched = text - for pattern, replacement in urs.build_docs_replacements(tests=3619): + for pattern, replacement in urs.build_docs_replacements( + stats=stats, + tests=3619, + converted=None, + ): patched = pattern.sub(replacement, patched) assert "3,619 tests collected" in patched diff --git a/src/tests/test_validate_graph_artifacts.py b/src/tests/test_validate_graph_artifacts.py index 8612b0f..00c14e4 100644 --- a/src/tests/test_validate_graph_artifacts.py +++ b/src/tests/test_validate_graph_artifacts.py @@ -15,6 +15,7 @@ from validate_graph_artifacts import ( DEFAULT_HARNESSES, GraphArtifactError, + _validate_root_entity_overlay, _safe_tar_name, _scan_graph_json, validate_graph_artifacts, @@ -853,6 +854,53 @@ def test_scan_graph_json_handles_pretty_printed_graph() -> None: assert _scan_graph_json(BytesIO(payload)) == (2, 2, 1, 1, 1, None) +@pytest.mark.parametrize("field", ["semantic_sim", "tag_sim", "token_sim"]) +def test_overlay_validation_rejects_out_of_range_similarity_fields( + tmp_path: Path, + field: str, +) -> None: + (tmp_path / "entity-overlays.jsonl").write_text( + json.dumps({ + "nodes": [{"id": "skill:a"}], + "edges": [ + { + "source": "skill:a", + "target": "skill:b", + "weight": 0.5, + "final_weight": 0.5, + field: 2.0, + }, + ], + }) + + "\n", + encoding="utf-8", + ) + + with pytest.raises(GraphArtifactError, match=f"{field} must be 0..1"): + _validate_root_entity_overlay(tmp_path / "entity-overlays.jsonl") + + +def test_overlay_validation_rejects_weight_final_weight_drift(tmp_path: Path) -> None: + (tmp_path / "entity-overlays.jsonl").write_text( + json.dumps({ + "nodes": [{"id": "skill:a"}], + "edges": [ + { + "source": "skill:a", + "target": "skill:b", + "weight": 0.7, + "final_weight": 0.5, + }, + ], + }) + + "\n", + encoding="utf-8", + ) + + with pytest.raises(GraphArtifactError, match="weight must equal final_weight"): + _validate_root_entity_overlay(tmp_path / "entity-overlays.jsonl") + + def test_scan_graph_json_extracts_top_level_graph_export_id() -> None: graph = { "directed": False, diff --git a/src/update_repo_stats.py b/src/update_repo_stats.py index b65ea13..0c8c874 100644 --- a/src/update_repo_stats.py +++ b/src/update_repo_stats.py @@ -26,6 +26,7 @@ import subprocess import sys import tarfile +from collections.abc import Mapping from pathlib import Path REPO_ROOT = Path(__file__).resolve().parent.parent @@ -426,7 +427,11 @@ def format_edges(n: int) -> str: return str(n) -def build_replacements(stats: dict, tests: int | None, converted: int | None) -> list[tuple[re.Pattern, str]]: +def build_replacements( + stats: Mapping[str, int | None], + tests: int | None, + converted: int | None, +) -> list[tuple[re.Pattern, str]]: """Return (regex, replacement) pairs for every stat.""" reps: list[tuple[re.Pattern, str]] = [] @@ -521,9 +526,11 @@ def build_replacements(stats: dict, tests: int | None, converted: int | None) -> f"**{n:,} entity pages** ({stats['skills']:,} skills + {stats['agents']:,} agents)", )) - if stats.get("skills_sh_entries") and stats.get("skills_sh_bodies"): - entries = int(stats["skills_sh_entries"]) - bodies = int(stats["skills_sh_bodies"]) + skills_sh_entries = stats.get("skills_sh_entries") + skills_sh_bodies = stats.get("skills_sh_bodies") + if skills_sh_entries is not None and skills_sh_bodies is not None: + entries = int(skills_sh_entries) + bodies = int(skills_sh_bodies) skill_pages = int(stats.get("skills") or entries) reps.append(( re.compile( @@ -571,13 +578,19 @@ def build_replacements(stats: dict, tests: int | None, converted: int | None) -> return reps -def build_docs_replacements(tests: int | None) -> list[tuple[re.Pattern[str], str]]: +def build_docs_replacements( + stats: Mapping[str, int | None], + tests: int | None, + converted: int | None, +) -> list[tuple[re.Pattern[str], str]]: + reps = build_replacements(stats, tests, converted) if tests is None: - return [] - return [( + return reps + reps.append(( re.compile(r"[\d,]+\s+tests collected"), f"{tests:,} tests collected", - )] + )) + return reps def patch_readme(check_only: bool = False) -> int: @@ -595,7 +608,7 @@ def patch_readme(check_only: bool = False) -> int: continue replacements = ( build_replacements(stats, tests, converted) - if target == README else build_docs_replacements(tests) + if target == README else build_docs_replacements(stats, tests, converted) ) original = target.read_text(encoding="utf-8") patched = original diff --git a/src/validate_graph_artifacts.py b/src/validate_graph_artifacts.py index a7662bb..2397d71 100644 --- a/src/validate_graph_artifacts.py +++ b/src/validate_graph_artifacts.py @@ -58,6 +58,14 @@ _SEMANTIC_SIM_RE = re.compile( rb'"semantic_sim"\s*:\s*(-?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)', ) +_EDGE_SCORE_FIELDS = ( + "weight", + "final_weight", + "similarity_score", + "semantic_sim", + "tag_sim", + "token_sim", +) _WINDOWS_DRIVE_RE = re.compile(r"^[A-Za-z]:") _PREVIEW_HTML_FILES = ( "sample-top60.html", @@ -175,7 +183,8 @@ def _validate_root_entity_overlay(path: Path) -> None: f"graph/entity-overlays.jsonl line {lineno} edge {index} " "must contain source/target", ) - for field in ("weight", "final_weight", "similarity_score"): + numeric_scores: dict[str, float] = {} + for field in _EDGE_SCORE_FIELDS: value = edge.get(field) if value is not None and ( not isinstance(value, int | float) or not 0 <= float(value) <= 1 @@ -184,6 +193,17 @@ def _validate_root_entity_overlay(path: Path) -> None: f"graph/entity-overlays.jsonl line {lineno} edge {index} " f"{field} must be 0..1", ) + if value is not None: + numeric_scores[field] = float(value) + if ( + "weight" in numeric_scores + and "final_weight" in numeric_scores + and abs(numeric_scores["weight"] - numeric_scores["final_weight"]) > 1e-9 + ): + raise GraphArtifactError( + f"graph/entity-overlays.jsonl line {lineno} edge {index} " + "weight must equal final_weight", + ) records += 1 if records == 0: raise GraphArtifactError("graph/entity-overlays.jsonl has no overlay records")