diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c59bb07..b51b76c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -74,7 +74,7 @@ jobs:
run: npm ci
- name: Run unit tests with coverage
- run: npm test -- --coverage
+ run: npm run test:coverage
- name: Upload coverage
if: always()
diff --git a/.github/workflows/dod.yml b/.github/workflows/dod.yml
index 0ee9368..bf01dce 100644
--- a/.github/workflows/dod.yml
+++ b/.github/workflows/dod.yml
@@ -49,7 +49,7 @@ jobs:
# Roda testes com coverage para gerar relatório
- name: Run unit tests with coverage
- run: npm test -- --coverage
+ run: npm run test:coverage
# Bloqueia se coverage < 80%
- name: Check coverage threshold (>=80%)
diff --git a/.gitignore b/.gitignore
index 98b4f6a..7347476 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,14 @@ docs-site/.docusaurus/
# TypeScript
*.tsbuildinfo
+# Python
+__pycache__/
+*.py[cod]
+*.egg-info/
+.pytest_cache/
+.venv/
+venv/
+
# Logs
*.log
npm-debug.log*
@@ -90,6 +98,7 @@ docs/**
!docs/YOOL_TUPLE_HAMT.md
scripts/**
!scripts/build_hamt.py
+!scripts/coverage.js
!scripts/skillopt/
!scripts/skillopt/*.js
playwright-report/**
@@ -98,6 +107,8 @@ tests/**
!tests/unit/*.test.js
!tests/e2e/
!tests/e2e/*.spec.ts
+!tests/python/
+!tests/python/*.py
test-results/**
coverage/**
bootstrap.ps1
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bda043b..9dac081 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,11 @@ Format follows [Keep a Changelog 1.1.0](https://keepachangelog.com/en/1.1.0/) an
## [Unreleased]
### Added
+- Standalone Python distribution `simplicio-mapper` on PyPI: dependency-free
+ `simplicio_mapper.mapper` port of the Node mapper plus a `map` / `update` CLI
+ exposed as the `simplicio-mapper` and `llm-project-mapper` console scripts.
+ Generates the same `.simplicio/project-map.json` and `precedent-index.json`
+ without requiring a Node toolchain.
- `map` / `update` CLI subcommands for generating and incrementally refreshing
`.simplicio/project-map.json` and `.simplicio/precedent-index.json`.
- Rich machine-readable mapper artifacts with file inventory, roles, imports,
diff --git a/PYPI.md b/PYPI.md
new file mode 100644
index 0000000..dfea25e
--- /dev/null
+++ b/PYPI.md
@@ -0,0 +1,72 @@
+# simplicio-mapper
+
+Python-first project mapper for the Simplicio ecosystem. It scans a repository
+and emits two machine-readable artifacts that agents and tooling can consume
+without parsing the human-readable markdown docs:
+
+- `.simplicio/project-map.json` (`simplicio.project-map/v1`) — file inventory,
+ architecture signals, entry points, tests, modules, entities, dependencies
+ and recent changes.
+- `.simplicio/precedent-index.json` (`simplicio.precedent-index/v1`) —
+ high-signal code examples tagged by change type, file, language, roles and
+ snippet.
+
+The full contract is documented in
+[SIMPLICIO_INTEGRATION.md](https://github.com/wesleysimplicio/simplicio-mapper/blob/main/SIMPLICIO_INTEGRATION.md).
+
+## Install
+
+```bash
+pip install simplicio-mapper
+```
+
+## Usage
+
+```bash
+# Map the current directory into .simplicio/
+simplicio-mapper map
+
+# Refresh artifacts and record changed files since the last run
+simplicio-mapper update
+
+# Map another project root, with hints when .starter-meta.json is absent
+simplicio-mapper map --root path/to/project --stack python --product-name "My App"
+
+# Re-run automatically while files change locally
+simplicio-mapper map --watch
+```
+
+The `llm-project-mapper` console script is provided as an alias.
+
+### Options
+
+| Option | Description |
+|---|---|
+| `--root
` | Project root to map. Defaults to the current directory. |
+| `--out ` | Artifact directory. Defaults to `.simplicio`. |
+| `--stack ` | Stack hint when `.starter-meta.json` is absent. |
+| `--product-name ` | Product name hint when `.starter-meta.json` is absent. |
+| `--incremental` | Record changed files and update existing artifacts. |
+| `--watch` | Re-run mapping when local files change. |
+| `--silent` | Minimal output. |
+| `-V`, `--version` | Show version and exit. |
+| `-h`, `--help` | Show help. |
+
+## Consuming the artifacts
+
+```python
+from pathlib import Path
+import json
+
+base = Path(".simplicio")
+project_map = json.loads((base / "project-map.json").read_text())
+precedents = json.loads((base / "precedent-index.json").read_text())
+
+top_files = sorted(
+ project_map["files"], key=lambda f: f.get("importance", 0), reverse=True
+)[:8]
+```
+
+## License
+
+MIT
diff --git a/README.md b/README.md
index ff9d776..f3bc326 100644
--- a/README.md
+++ b/README.md
@@ -48,6 +48,22 @@ npx @wesleysimplicio/llm-project-mapper map --incremental
npx @wesleysimplicio/llm-project-mapper update
```
+### New: standalone Python CLI
+
+The mapper now ships as a dependency-free Python package, so Python-first teams
+can generate the same artifacts without a Node toolchain:
+
+```bash
+pip install simplicio-mapper
+
+simplicio-mapper map # write .simplicio/ artifacts
+simplicio-mapper update # refresh and record changed files
+simplicio-mapper map --watch # re-map as files change locally
+```
+
+Both `simplicio-mapper` and `llm-project-mapper` console scripts are installed,
+and the Python output is byte-for-byte compatible with the Node mapper's schema.
+
Use `--watch` during long agent sessions to keep the map fresh. The schema and
Python consumption example live in [SIMPLICIO_INTEGRATION.md](SIMPLICIO_INTEGRATION.md).
diff --git a/README.pt-BR.md b/README.pt-BR.md
index 41d0cf5..9213261 100644
--- a/README.pt-BR.md
+++ b/README.pt-BR.md
@@ -47,6 +47,22 @@ npx @wesleysimplicio/llm-project-mapper map --incremental
npx @wesleysimplicio/llm-project-mapper update
```
+### Novidade: CLI Python standalone
+
+O mapper agora tambem e distribuido como pacote Python sem dependencias, para
+times Python-first gerarem os mesmos artefatos sem toolchain Node:
+
+```bash
+pip install simplicio-mapper
+
+simplicio-mapper map # escreve os artefatos em .simplicio/
+simplicio-mapper update # atualiza e registra arquivos alterados
+simplicio-mapper map --watch # remapeia conforme arquivos mudam
+```
+
+Os console scripts `simplicio-mapper` e `llm-project-mapper` sao instalados, e a
+saida Python e compativel com o schema do mapper Node.
+
Use `--watch` durante sessoes longas de agentes para manter o mapa fresco. O
schema e um exemplo de consumo em Python ficam em
[SIMPLICIO_INTEGRATION.md](SIMPLICIO_INTEGRATION.md).
diff --git a/package.json b/package.json
index 2dae705..6400d75 100644
--- a/package.json
+++ b/package.json
@@ -82,7 +82,7 @@
},
"scripts": {
"test": "node --test",
- "test:coverage": "node --test --experimental-test-coverage",
+ "test:coverage": "node scripts/coverage.js",
"test:cli": "node bin/cli.js --help",
"test:e2e": "playwright test",
"lint": "node scripts/lint.js",
diff --git a/scripts/coverage.js b/scripts/coverage.js
new file mode 100644
index 0000000..2bcf71a
--- /dev/null
+++ b/scripts/coverage.js
@@ -0,0 +1,65 @@
+#!/usr/bin/env node
+'use strict';
+
+// Runs the Node.js built-in test runner with V8 coverage and converts the
+// "all files" summary row into coverage/coverage-summary.json (Istanbul
+// json-summary shape) so the DoD gate can read total.lines.pct.
+//
+// Parses the textual coverage table instead of the lcov reporter so it works
+// across the Node 20 and 22 CI matrix (the lcov reporter is not available on
+// every supported version). Dependency-free.
+
+const fs = require('node:fs');
+const path = require('node:path');
+const { spawnSync } = require('node:child_process');
+
+const COVERAGE_DIR = path.resolve(process.cwd(), 'coverage');
+const SUMMARY_PATH = path.join(COVERAGE_DIR, 'coverage-summary.json');
+
+fs.mkdirSync(COVERAGE_DIR, { recursive: true });
+
+const result = spawnSync(
+ process.execPath,
+ ['--test', '--experimental-test-coverage'],
+ { encoding: 'utf8', maxBuffer: 64 * 1024 * 1024 },
+);
+
+if (result.error) {
+ console.error(`coverage run failed to start: ${result.error.message}`);
+ process.exit(1);
+}
+
+const stdout = result.stdout || '';
+const stderr = result.stderr || '';
+process.stdout.write(stdout);
+if (stderr) process.stderr.write(stderr);
+
+// Matches the summary row, tolerating the "# " (tap) or "ℹ " (spec) prefix:
+// all files | 91.32 | 69.97 | 88.39 |
+const match = stdout.match(/all files\s*\|\s*([0-9.]+)\s*\|\s*([0-9.]+)\s*\|\s*([0-9.]+)/);
+
+if (!match) {
+ if (result.status !== 0) process.exit(result.status);
+ console.error('::error::could not parse coverage summary row from test output');
+ process.exit(1);
+}
+
+const linesPct = Number(match[1]);
+const branchesPct = Number(match[2]);
+const functionsPct = Number(match[3]);
+const metric = (pct) => ({ total: 0, covered: 0, skipped: 0, pct });
+
+const summary = {
+ total: {
+ lines: metric(linesPct),
+ statements: metric(linesPct),
+ functions: metric(functionsPct),
+ branches: metric(branchesPct),
+ },
+};
+
+fs.writeFileSync(SUMMARY_PATH, JSON.stringify(summary, null, 2) + '\n');
+console.log(`coverage summary written to ${path.relative(process.cwd(), SUMMARY_PATH)} `
+ + `(lines ${linesPct}%, functions ${functionsPct}%, branches ${branchesPct}%)`);
+
+process.exit(result.status === null ? 1 : result.status);
diff --git a/simplicio_mapper/cli.py b/simplicio_mapper/cli.py
new file mode 100644
index 0000000..9019a87
--- /dev/null
+++ b/simplicio_mapper/cli.py
@@ -0,0 +1,156 @@
+"""Command-line entry point for simplicio-mapper.
+
+Mirrors ``bin/map.js``: generates or refreshes the machine-readable mapper
+artifacts under ``.simplicio/``. Exposed as the ``simplicio-mapper`` and
+``llm-project-mapper`` console scripts (see ``pyproject.toml``).
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import time
+from typing import Sequence
+
+from . import __version__
+from .mapper import write_mapping_artifacts
+
+HELP_TEXT = """simplicio-mapper map
+
+Generate or update machine-readable mapper artifacts.
+
+USAGE
+ simplicio-mapper map [--root ] [--incremental] [--watch]
+ simplicio-mapper update [--root ] [--watch]
+
+OPTIONS
+ --root Project root to map. Defaults to cwd.
+ --stack Stack hint when .starter-meta.json is absent.
+ --product-name Product name hint when .starter-meta.json is absent.
+ --out Artifact directory. Defaults to .simplicio.
+ --incremental Record changed files and update existing artifacts.
+ --watch Re-run mapping when local files change.
+ --silent Minimal output.
+ -V, --version Show version and exit.
+ -h, --help Show this help
+"""
+
+
+def _read_json_safe(file: str) -> dict:
+ try:
+ with open(file, "r", encoding="utf-8") as handle:
+ return json.load(handle)
+ except (OSError, ValueError):
+ return {}
+
+
+def _parse_args(argv: Sequence[str]) -> dict:
+ opts = {
+ "root": os.getcwd(),
+ "out": ".simplicio",
+ "stack": "",
+ "product_name": "",
+ "incremental": False,
+ "watch": False,
+ "silent": False,
+ }
+ command = "update" if argv and argv[0] == "update" else "map"
+ if command == "update":
+ opts["incremental"] = True
+ i = 1 if argv and argv[0] in ("map", "update") else 0
+ while i < len(argv):
+ arg = argv[i]
+ if arg == "--root":
+ i += 1
+ opts["root"] = argv[i]
+ elif arg == "--out":
+ i += 1
+ opts["out"] = argv[i]
+ elif arg == "--stack":
+ i += 1
+ opts["stack"] = argv[i]
+ elif arg == "--product-name":
+ i += 1
+ opts["product_name"] = argv[i]
+ elif arg == "--incremental":
+ opts["incremental"] = True
+ elif arg == "--watch":
+ opts["watch"] = True
+ elif arg == "--silent":
+ opts["silent"] = True
+ elif arg in ("-h", "--help"):
+ print(HELP_TEXT)
+ sys.exit(0)
+ elif arg in ("-V", "--version"):
+ print(__version__)
+ sys.exit(0)
+ else:
+ print(f"Unknown map option: {arg}", file=sys.stderr)
+ print("Run `simplicio-mapper map --help` for usage.", file=sys.stderr)
+ sys.exit(2)
+ i += 1
+ return opts
+
+
+def _run_once(opts: dict) -> dict:
+ root = os.path.abspath(opts["root"])
+ meta = dict(_read_json_safe(os.path.join(root, ".starter-meta.json")))
+ if opts["stack"]:
+ meta["stack"] = opts["stack"]
+ if opts["product_name"]:
+ meta["product_name"] = opts["product_name"]
+ log = (lambda _line: None) if opts["silent"] else print
+ return write_mapping_artifacts(
+ cwd=root,
+ meta=meta,
+ incremental=opts["incremental"],
+ output_dir=opts["out"],
+ log=log,
+ )
+
+
+def _signature(root: str, out: str) -> tuple:
+ abs_out = os.path.abspath(os.path.join(root, out))
+ entries = []
+ for current, dirs, files in os.walk(root):
+ dirs[:] = [d for d in dirs if d not in (".git", "node_modules") and os.path.abspath(os.path.join(current, d)) != abs_out]
+ for name in files:
+ path = os.path.join(current, name)
+ try:
+ stat = os.stat(path)
+ except OSError:
+ continue
+ entries.append((path, stat.st_mtime_ns, stat.st_size))
+ return tuple(sorted(entries))
+
+
+def _watch(opts: dict) -> None:
+ root = os.path.abspath(opts["root"])
+ print(f"watching {root} for mapper updates...")
+ last = _signature(root, opts["out"])
+ try:
+ while True:
+ time.sleep(0.5)
+ current = _signature(root, opts["out"])
+ if current != last:
+ last = current
+ try:
+ _run_once({**opts, "incremental": True})
+ except Exception as error: # noqa: BLE001 - watch loop must not crash
+ print(f"map update failed: {error}", file=sys.stderr)
+ except KeyboardInterrupt:
+ pass
+
+
+def main(argv: Sequence[str] | None = None) -> int:
+ argv = list(sys.argv[1:] if argv is None else argv)
+ opts = _parse_args(argv)
+ _run_once(opts)
+ if opts["watch"]:
+ _watch(opts)
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/simplicio_mapper/mapper.py b/simplicio_mapper/mapper.py
new file mode 100644
index 0000000..09f9e7e
--- /dev/null
+++ b/simplicio_mapper/mapper.py
@@ -0,0 +1,536 @@
+"""Project mapper that emits the Simplicio machine-readable artifacts.
+
+This is the Python port of ``bin/mapper-artifacts.js``. It produces
+``.simplicio/project-map.json`` (schema ``simplicio.project-map/v1``) and
+``.simplicio/precedent-index.json`` (schema ``simplicio.precedent-index/v1``)
+as documented in ``SIMPLICIO_INTEGRATION.md``. Pure standard library, no
+third-party dependencies.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import os
+import re
+import subprocess
+from datetime import datetime, timezone
+from typing import Any, Callable
+
+ARTIFACT_SCHEMA = "simplicio.project-map/v1"
+PRECEDENT_SCHEMA = "simplicio.precedent-index/v1"
+ARTIFACT_VERSION = 1
+
+TEXT_EXTS = {
+ ".md", ".txt", ".json", ".jsonc", ".yml", ".yaml", ".toml",
+ ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
+ ".py", ".go", ".rs", ".java", ".kt", ".php", ".rb", ".cs",
+ ".cshtml", ".razor", ".sh", ".ps1", ".env", "",
+}
+
+SKIP_DIRS = {
+ ".git", "node_modules", "dist", "build", "out", "coverage",
+ ".next", ".nuxt", "playwright-report", "test-results", ".turbo",
+ ".venv", "venv", "__pycache__", ".idea", ".vscode", ".simplicio",
+ ".catalog", ".receipts",
+}
+
+CONFIG_FILES = {
+ "package.json", "pyproject.toml", "requirements.txt", "go.mod", "Cargo.toml",
+ "pom.xml", "build.gradle", "settings.gradle", "tsconfig.json",
+ "vite.config.ts", "next.config.js", "angular.json", "Dockerfile",
+}
+
+LANGUAGE_BY_EXT = {
+ ".js": "javascript",
+ ".jsx": "javascript",
+ ".mjs": "javascript",
+ ".cjs": "javascript",
+ ".ts": "typescript",
+ ".tsx": "typescript",
+ ".py": "python",
+ ".go": "go",
+ ".rs": "rust",
+ ".java": "java",
+ ".kt": "kotlin",
+ ".php": "php",
+ ".rb": "ruby",
+ ".cs": "csharp",
+ ".cshtml": "razor",
+ ".razor": "razor",
+ ".md": "markdown",
+ ".json": "json",
+ ".yaml": "yaml",
+ ".yml": "yaml",
+ ".toml": "toml",
+ ".sh": "shell",
+ ".ps1": "powershell",
+}
+
+ENTRYPOINT_STEMS = {"index", "main", "server", "app", "program", "cli"}
+TOKEN_STOPWORDS = {"src", "lib", "test", "tests", "index", "main"}
+
+
+def _iso(dt: datetime) -> str:
+ return dt.astimezone(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
+
+
+def _now_iso() -> str:
+ return _iso(datetime.now(timezone.utc))
+
+
+def _normalize_rel(file: str) -> str:
+ return file.replace(os.sep, "/")
+
+
+def _read_safe(file: str) -> str:
+ try:
+ with open(file, "r", encoding="utf-8", errors="replace") as handle:
+ return handle.read()
+ except OSError:
+ return ""
+
+
+def _sha256(text: str) -> str:
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
+
+
+def _parse_json_safe(file: str) -> dict:
+ try:
+ return json.loads(_read_safe(file) or "{}")
+ except (ValueError, TypeError):
+ return {}
+
+
+def _walk(root: str):
+ try:
+ entries = sorted(os.scandir(root), key=lambda e: e.name)
+ except OSError:
+ return
+ for entry in entries:
+ if entry.name in SKIP_DIRS:
+ continue
+ if entry.is_dir(follow_symlinks=False):
+ yield from _walk(entry.path)
+ elif entry.is_file(follow_symlinks=False):
+ yield entry.path
+
+
+def _language_for(file: str) -> str:
+ base = os.path.basename(file)
+ if base == "Dockerfile":
+ return "dockerfile"
+ ext = os.path.splitext(file)[1].lower()
+ if ext in LANGUAGE_BY_EXT:
+ return LANGUAGE_BY_EXT[ext]
+ return ext[1:] if ext else "text"
+
+
+def _git_status_map(cwd: str) -> dict[str, str]:
+ out: dict[str, str] = {}
+ try:
+ result = subprocess.run(
+ ["git", "status", "--porcelain"],
+ cwd=cwd,
+ capture_output=True,
+ text=True,
+ timeout=3,
+ )
+ except (OSError, subprocess.SubprocessError):
+ return out
+ if result.returncode != 0:
+ return out
+ for line in (result.stdout or "").split("\n"):
+ if not line.strip():
+ continue
+ status = line[:2].strip() or "modified"
+ raw = line[3:].strip()
+ file = raw.split(" -> ")[-1] if " -> " in raw else raw
+ out[_normalize_rel(file)] = status
+ return out
+
+
+def _collect_text_files(cwd: str) -> list[str]:
+ files = []
+ for file in _walk(cwd):
+ ext = os.path.splitext(file)[1].lower()
+ if ext not in TEXT_EXTS:
+ continue
+ try:
+ if os.path.getsize(file) > 250_000:
+ continue
+ except OSError:
+ continue
+ files.append(file)
+ return sorted(files)
+
+
+def _parse_imports(text: str, language: str) -> list[str]:
+ patterns: list[re.Pattern[str]] = []
+ if language in ("javascript", "typescript"):
+ patterns.append(re.compile(r"import\s+[^'\"]*['\"]([^'\"]+)['\"]"))
+ patterns.append(re.compile(r"require\(['\"]([^'\"]+)['\"]\)"))
+ elif language == "python":
+ patterns.append(re.compile(r"^\s*from\s+([A-Za-z0-9_.]+)\s+import\s+", re.MULTILINE))
+ patterns.append(re.compile(r"^\s*import\s+([A-Za-z0-9_.]+)", re.MULTILINE))
+ elif language in ("csharp", "razor"):
+ patterns.append(re.compile(r"^\s*using\s+([A-Za-z0-9_.]+)\s*;", re.MULTILINE))
+ elif language == "go":
+ patterns.append(re.compile(r'^\s*import\s+"([^"]+)"', re.MULTILINE))
+ found: list[str] = []
+ for pattern in patterns:
+ for match in pattern.finditer(text):
+ found.append(match.group(1))
+ uniq = list(dict.fromkeys(found))
+ return sorted(uniq[:20])
+
+
+_SYMBOL_PATTERNS = [
+ re.compile(r"\bclass\s+([A-Z][A-Za-z0-9_]*)"),
+ re.compile(r"\bfunction\s+([A-Za-z0-9_]+)"),
+ re.compile(r"\bexport\s+(?:async\s+)?function\s+([A-Za-z0-9_]+)"),
+ re.compile(r"\bexport\s+const\s+([A-Za-z0-9_]+)"),
+ re.compile(r"\bdef\s+([A-Za-z0-9_]+)"),
+ re.compile(r"\bfunc\s+([A-Za-z0-9_]+)"),
+]
+
+
+def _parse_symbols(text: str) -> list[str]:
+ found: list[str] = []
+ for pattern in _SYMBOL_PATTERNS:
+ for match in pattern.finditer(text):
+ found.append(match.group(1))
+ uniq = list(dict.fromkeys(found))
+ return sorted(uniq[:30])
+
+
+_RE_TEST_PATH = re.compile(r"(\b|/)(__tests__|tests?|specs?)(/|\b)", re.IGNORECASE)
+_RE_TEST_FILE = re.compile(r"\.(test|spec)\.[^.]+$", re.IGNORECASE)
+_RE_CONFIG = re.compile(r"config|rc$|\.config\.", re.IGNORECASE)
+_RE_ROUTE = re.compile(r"routes?|controllers?|pages?|app/", re.IGNORECASE)
+_RE_UI = re.compile(r"components?|views?", re.IGNORECASE)
+_RE_DOMAIN = re.compile(r"services?|repositories?|models?|entities?", re.IGNORECASE)
+
+
+def _roles_for(rel: str, pkg: dict) -> list[str]:
+ roles: set[str] = set()
+ base = os.path.basename(rel)
+ no_ext = re.sub(r"\.[^.]+$", "", base).lower()
+ if _RE_TEST_PATH.search(rel) or _RE_TEST_FILE.search(base):
+ roles.add("test")
+ if base in CONFIG_FILES or _RE_CONFIG.search(base):
+ roles.add("config")
+ main_value = _normalize_rel(pkg["main"]) if isinstance(pkg.get("main"), str) else ""
+ bin_field = pkg.get("bin")
+ if isinstance(bin_field, str):
+ bin_values = [_normalize_rel(bin_field)]
+ elif isinstance(bin_field, dict):
+ bin_values = [_normalize_rel(v) for v in bin_field.values() if isinstance(v, str)]
+ else:
+ bin_values = []
+ if main_value == rel or rel in bin_values or no_ext in ENTRYPOINT_STEMS:
+ roles.add("entrypoint")
+ if _RE_ROUTE.search(rel):
+ roles.add("route")
+ if _RE_UI.search(rel):
+ roles.add("ui")
+ if _RE_DOMAIN.search(rel):
+ roles.add("domain")
+ return sorted(roles)
+
+
+def _importance_for(meta: dict) -> float:
+ score = 0.12
+ roles = meta["roles"]
+ if "entrypoint" in roles:
+ score += 0.45
+ if "test" in roles:
+ score += 0.25
+ if "config" in roles:
+ score += 0.2
+ if "domain" in roles:
+ score += 0.2
+ if meta["imports"]:
+ score += 0.08
+ if meta["exports"]:
+ score += 0.08
+ if meta["git_status"] and meta["git_status"] != "clean":
+ score += 0.2
+ return min(1.0, round(score, 2))
+
+
+_RE_CAMEL = re.compile(r"([a-z])([A-Z])")
+_RE_NON_ALNUM = re.compile(r"[^A-Za-z0-9]+")
+
+
+def _token_words(value: Any) -> list[str]:
+ spaced = _RE_CAMEL.sub(r"\1 \2", str(value or ""))
+ out = []
+ for part in _RE_NON_ALNUM.split(spaced):
+ token = part.lower()
+ if len(token) > 2 and token not in TOKEN_STOPWORDS:
+ out.append(token)
+ return out
+
+
+def _collect_entities(files: list[dict]) -> list[dict]:
+ scores: dict[str, int] = {}
+ for file in files:
+ stem = os.path.basename(file["path"])
+ ext = os.path.splitext(file["path"])[1]
+ if ext and stem.endswith(ext):
+ stem = stem[: -len(ext)]
+ for token in _token_words(stem):
+ scores[token] = scores.get(token, 0) + 1
+ for symbol in file.get("exports", []):
+ for token in _token_words(symbol):
+ scores[token] = scores.get(token, 0) + 2
+ ordered = sorted(scores.items(), key=lambda kv: (-kv[1], kv[0]))
+ return [{"name": name, "score": score} for name, score in ordered[:30]]
+
+
+_ARCH_CHECKS = [
+ ("nextjs", re.compile(r"next")),
+ ("react", re.compile(r"react")),
+ ("vue", re.compile(r"vue")),
+ ("angular", re.compile(r"angular|@angular")),
+ ("express", re.compile(r"express")),
+ ("nestjs", re.compile(r"nestjs|@nestjs")),
+ ("fastapi", re.compile(r"fastapi")),
+ ("django", re.compile(r"django")),
+ ("dotnet", re.compile(r"aspnetcore|\.csproj|dotnet")),
+ ("go", re.compile(r"\bgo\.mod\b|\bgin\b|\bfiber\b")),
+ ("rust", re.compile(r"cargo\.toml|actix|axum")),
+ ("playwright", re.compile(r"playwright")),
+ ("stripe", re.compile(r"stripe")),
+ ("prisma", re.compile(r"prisma")),
+]
+
+
+def _collect_architecture_signals(pkg: dict, corpus: str, stack: str) -> list[str]:
+ text = f"{stack}\n{json.dumps(pkg)}\n{corpus}".lower()
+ return sorted(name for name, rx in _ARCH_CHECKS if rx.search(text))
+
+
+def _group_modules(files: list[dict]) -> list[dict]:
+ groups: dict[str, dict] = {}
+ for file in files:
+ first = file["path"].split("/")[0] if "/" in file["path"] else "."
+ group = groups.setdefault(first, {"name": first, "files": [], "roles": set()})
+ group["files"].append(file["path"])
+ group["roles"].update(file["roles"])
+ result = []
+ for group in sorted(groups.values(), key=lambda g: g["name"]):
+ result.append({
+ "name": group["name"],
+ "files": group["files"][:20],
+ "roles": sorted(group["roles"]),
+ "file_count": len(group["files"]),
+ })
+ return result
+
+
+def _detect_changed_files(files, previous_map, status_map, incremental) -> list[str]:
+ previous = {f["path"]: f for f in previous_map.get("files", [])}
+ changed = {file for file, status in status_map.items() if status != "clean"}
+ if incremental:
+ for file in files:
+ before = previous.get(file["path"])
+ if not before or before.get("file_hash") != file["file_hash"] or before.get("size_bytes") != file["size_bytes"]:
+ changed.add(file["path"])
+ present = {entry["path"] for entry in files}
+ return sorted(file for file in changed if file in present)
+
+
+def _load_previous_map(output_dir: str) -> dict:
+ target = os.path.join(output_dir, "project-map.json")
+ try:
+ with open(target, "r", encoding="utf-8") as handle:
+ return json.load(handle)
+ except (OSError, ValueError):
+ return {}
+
+
+def _build_file_inventory(cwd: str, pkg: dict, status_map: dict) -> list[dict]:
+ inventory = []
+ for abs_path in _collect_text_files(cwd):
+ rel = _normalize_rel(os.path.relpath(abs_path, cwd))
+ text = _read_safe(abs_path)
+ stat = os.stat(abs_path)
+ language = _language_for(rel)
+ roles = _roles_for(rel, pkg)
+ imports = _parse_imports(text, language)
+ exports = _parse_symbols(text)
+ entry = {
+ "path": rel,
+ "language": language,
+ "size_bytes": stat.st_size,
+ "last_modified": _iso(datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc)),
+ "file_hash": _sha256(text),
+ "git_status": status_map.get(rel, "clean"),
+ "roles": roles,
+ "imports": imports,
+ "exports": exports,
+ }
+ entry["importance"] = _importance_for(entry)
+ inventory.append(entry)
+ return sorted(inventory, key=lambda e: e["path"])
+
+
+_RE_PLACEHOLDER = re.compile(r"<[A-Z][A-Z0-9_]+>")
+_PRECEDENT_PATTERNS = [
+ (re.compile(r"\btest\s*\(|\bit\s*\(|\bdescribe\s*\(|\bdef\s+test_", re.IGNORECASE), "test"),
+ (re.compile(r"\bclass\s+[A-Z]|\bfunction\s+\w+|\bdef\s+\w+|\bfunc\s+\w+", re.IGNORECASE), None),
+ (re.compile(r"\btry\b|\bcatch\b|\bexcept\b|\bthrow\b", re.IGNORECASE), "error-handling"),
+ (re.compile(r"\brouter\.|\bapp\.get\b|\bapp\.post\b|@app\.", re.IGNORECASE), "route"),
+]
+
+
+def _extract_snippet(lines: list[str], line_index: int, radius: int = 2) -> str:
+ start = max(0, line_index - radius)
+ end = min(len(lines), line_index + radius + 1)
+ return "\n".join(lines[start:end])[:1200]
+
+
+def _build_precedent_items(cwd: str, files: list[dict]) -> list[dict]:
+ items = []
+ for file in files:
+ abs_path = os.path.join(cwd, file["path"])
+ lines = _read_safe(abs_path).split("\n")
+ is_test = "test" in file["roles"]
+ for i, line in enumerate(lines):
+ change_type = None
+ for rx, fixed_type in _PRECEDENT_PATTERNS:
+ if rx.search(line):
+ change_type = fixed_type if fixed_type is not None else ("test" if is_test else "feature")
+ break
+ if change_type is None:
+ continue
+ snippet = _extract_snippet(lines, i)
+ if _RE_PLACEHOLDER.search(snippet):
+ break
+ tags = list(dict.fromkeys(
+ [r for r in file["roles"] if r]
+ + ([file["language"]] if file["language"] else [])
+ + _token_words(file["path"])
+ ))[:10]
+ items.append({
+ "id": _sha256(f"{file['path']}:{i + 1}:{line}")[:16],
+ "path": file["path"],
+ "line": i + 1,
+ "language": file["language"],
+ "change_type": change_type,
+ "tags": tags,
+ "summary": f"{change_type} precedent in {file['path']}",
+ "snippet": snippet,
+ })
+ break
+ items.sort(key=lambda item: (item["path"], item["line"]))
+ return items[:250]
+
+
+def build_artifacts(cwd: str, meta: dict | None = None, incremental: bool = False,
+ output_dir: str = ".simplicio") -> dict:
+ meta = meta or {}
+ abs_cwd = os.path.abspath(cwd or os.getcwd())
+ abs_out = os.path.abspath(os.path.join(abs_cwd, output_dir))
+ pkg = _parse_json_safe(os.path.join(abs_cwd, "package.json"))
+ status_map = _git_status_map(abs_cwd)
+ previous_map = _load_previous_map(abs_out)
+ files = _build_file_inventory(abs_cwd, pkg, status_map)
+ corpus = "\n".join(_read_safe(os.path.join(abs_cwd, f["path"]))[:3000] for f in files[:80])
+ changed_files = _detect_changed_files(files, previous_map, status_map, incremental)
+ stack = meta.get("stack") or pkg.get("type") or "unknown"
+ product_name = meta.get("product_name") or pkg.get("name") or os.path.basename(abs_cwd)
+ architecture_signals = _collect_architecture_signals(pkg, corpus, stack)
+ generated_at = _now_iso()
+
+ if os.path.exists(os.path.join(abs_cwd, "pnpm-lock.yaml")):
+ package_manager = "pnpm"
+ elif os.path.exists(os.path.join(abs_cwd, "yarn.lock")):
+ package_manager = "yarn"
+ else:
+ package_manager = "npm"
+
+ web_signal = "react" in architecture_signals or "nextjs" in architecture_signals
+ if meta.get("project_mode") == "monorepo":
+ system_type = "monorepo"
+ else:
+ system_type = "web" if web_signal else "library-or-service"
+
+ project_map = {
+ "schema": ARTIFACT_SCHEMA,
+ "version": ARTIFACT_VERSION,
+ "generated_at": generated_at,
+ "update_mode": "incremental" if incremental else "full",
+ "product": {
+ "name": product_name,
+ "stack": stack,
+ "project_mode": meta.get("project_mode", "root"),
+ },
+ "files": files,
+ "entry_points": [f["path"] for f in files if "entrypoint" in f["roles"]],
+ "test_files": [f["path"] for f in files if "test" in f["roles"]],
+ "config_files": [f["path"] for f in files if "config" in f["roles"]],
+ "modules": _group_modules(files),
+ "entities": _collect_entities(files),
+ "architecture": {
+ "signals": architecture_signals,
+ "system_type": system_type,
+ },
+ "dependencies": {
+ "package_manager": package_manager,
+ "manifest": "package.json" if pkg.get("name") else None,
+ "runtime": sorted((pkg.get("dependencies") or {}).keys()),
+ "dev": sorted((pkg.get("devDependencies") or {}).keys()),
+ },
+ "recent_changes": [
+ {"path": file, "status": status_map.get(file, "modified")} for file in changed_files
+ ],
+ "changed_files": changed_files,
+ "integration": {
+ "dev_cli_mapper": "read .simplicio/project-map.json, then use .simplicio/precedent-index.json for task-specific examples",
+ "contract": "SIMPLICIO_INTEGRATION.md",
+ },
+ }
+
+ precedent_index = {
+ "schema": PRECEDENT_SCHEMA,
+ "version": ARTIFACT_VERSION,
+ "generated_at": generated_at,
+ "source_project_map": ".simplicio/project-map.json",
+ "items": _build_precedent_items(abs_cwd, files),
+ }
+
+ return {"project_map": project_map, "precedent_index": precedent_index}
+
+
+def _write_json_stable(file: str, data: Any) -> None:
+ os.makedirs(os.path.dirname(file), exist_ok=True)
+ with open(file, "w", encoding="utf-8") as handle:
+ handle.write(json.dumps(data, indent=2, ensure_ascii=False) + "\n")
+
+
+def write_mapping_artifacts(cwd: str, meta: dict | None = None, incremental: bool = False,
+ output_dir: str = ".simplicio",
+ log: Callable[[str], None] | None = None) -> dict:
+ log = log or (lambda _line: None)
+ abs_cwd = os.path.abspath(cwd or os.getcwd())
+ abs_out = os.path.abspath(os.path.join(abs_cwd, output_dir))
+ artifacts = build_artifacts(abs_cwd, meta, incremental, output_dir)
+ project_map = artifacts["project_map"]
+ precedent_index = artifacts["precedent_index"]
+ project_map_path = os.path.join(abs_out, "project-map.json")
+ precedent_path = os.path.join(abs_out, "precedent-index.json")
+ _write_json_stable(project_map_path, project_map)
+ _write_json_stable(precedent_path, precedent_index)
+ log(f"-> wrote {os.path.relpath(project_map_path, abs_cwd)} "
+ f"({len(project_map['files'])} files, {len(project_map['changed_files'])} changed)")
+ log(f"-> wrote {os.path.relpath(precedent_path, abs_cwd)} "
+ f"({len(precedent_index['items'])} precedents)")
+ return {
+ "project_map_path": project_map_path,
+ "precedent_path": precedent_path,
+ "project_map": project_map,
+ "precedent_index": precedent_index,
+ }
diff --git a/tests/python/test_cli.py b/tests/python/test_cli.py
new file mode 100644
index 0000000..94903fe
--- /dev/null
+++ b/tests/python/test_cli.py
@@ -0,0 +1,138 @@
+"""Unit tests for the simplicio_mapper Python CLI and mapper.
+
+Pure stdlib (unittest). Run with: python3 -m unittest discover -s tests/python
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[2]
+sys.path.insert(0, str(ROOT))
+
+from simplicio_mapper import __version__ # noqa: E402
+from simplicio_mapper.cli import main # noqa: E402
+from simplicio_mapper.mapper import ( # noqa: E402
+ ARTIFACT_SCHEMA,
+ PRECEDENT_SCHEMA,
+ build_artifacts,
+ write_mapping_artifacts,
+)
+
+
+def _write(base: Path, rel: str, content: str) -> None:
+ target = base / rel
+ target.parent.mkdir(parents=True, exist_ok=True)
+ target.write_text(content, encoding="utf-8")
+
+
+class MapperArtifactsTest(unittest.TestCase):
+ def setUp(self) -> None:
+ self._tmp = tempfile.TemporaryDirectory()
+ self.dir = Path(self._tmp.name)
+
+ def tearDown(self) -> None:
+ self._tmp.cleanup()
+
+ def test_build_artifacts_emits_rich_project_map(self) -> None:
+ _write(self.dir, "package.json", json.dumps({
+ "name": "artifact-host",
+ "scripts": {"test": "node --test", "lint": "node scripts/lint.js"},
+ "dependencies": {"express": "^4.0.0"},
+ }))
+ _write(self.dir, "src/server.js",
+ "const express = require('express');\nfunction startServer() {}\nmodule.exports = { startServer };\n")
+ _write(self.dir, "tests/server.test.js",
+ "const { test } = require('node:test');\ntest('starts server', () => {});\n")
+
+ result = build_artifacts(
+ cwd=str(self.dir),
+ meta={"product_name": "Artifact Host", "stack": "node-express", "project_mode": "root"},
+ )
+ project_map = result["project_map"]
+ precedent_index = result["precedent_index"]
+
+ self.assertEqual(project_map["schema"], ARTIFACT_SCHEMA)
+ self.assertEqual(project_map["product"]["name"], "Artifact Host")
+ self.assertTrue(any(
+ f["path"] == "src/server.js" and f["language"] == "javascript"
+ for f in project_map["files"]
+ ))
+ self.assertIn("src/server.js", project_map["entry_points"])
+ self.assertIn("tests/server.test.js", project_map["test_files"])
+ self.assertIn("express", project_map["architecture"]["signals"])
+ self.assertTrue(any(e["name"] == "server" for e in project_map["entities"]))
+ self.assertEqual(precedent_index["schema"], PRECEDENT_SCHEMA)
+ self.assertTrue(any(
+ item["path"] == "tests/server.test.js" and item["change_type"] == "test"
+ for item in precedent_index["items"]
+ ))
+
+ def test_write_mapping_artifacts_persists_files(self) -> None:
+ _write(self.dir, "package.json", json.dumps({"name": "write-host"}))
+ _write(self.dir, "src/index.js", "export function run() { return 1; }\n")
+
+ out = write_mapping_artifacts(cwd=str(self.dir), meta={"stack": "node"})
+ self.assertTrue(os.path.exists(out["project_map_path"]))
+ self.assertTrue(os.path.exists(out["precedent_path"]))
+
+ on_disk = json.loads(Path(out["project_map_path"]).read_text())
+ self.assertEqual(on_disk["update_mode"], "full")
+
+ def test_incremental_records_changed_files(self) -> None:
+ _write(self.dir, "package.json", json.dumps({"name": "incremental-host"}))
+ _write(self.dir, "src/index.js", "export function run() { return 1; }\n")
+ write_mapping_artifacts(cwd=str(self.dir), meta={"stack": "node"})
+
+ _write(self.dir, "src/index.js", "export function run() { return 2; }\n")
+ result = write_mapping_artifacts(cwd=str(self.dir), meta={"stack": "node"}, incremental=True)
+
+ project_map = result["project_map"]
+ self.assertEqual(project_map["update_mode"], "incremental")
+ self.assertIn("src/index.js", project_map["changed_files"])
+
+
+class CliTest(unittest.TestCase):
+ def setUp(self) -> None:
+ self._tmp = tempfile.TemporaryDirectory()
+ self.dir = Path(self._tmp.name)
+
+ def tearDown(self) -> None:
+ self._tmp.cleanup()
+
+ def test_main_map_writes_artifacts(self) -> None:
+ _write(self.dir, "package.json", json.dumps({"name": "cli-host"}))
+ _write(self.dir, "src/index.js", "export function run() {}\n")
+
+ code = main(["map", "--root", str(self.dir), "--stack", "node",
+ "--product-name", "CLI Host", "--silent"])
+ self.assertEqual(code, 0)
+
+ project_map = json.loads((self.dir / ".simplicio" / "project-map.json").read_text())
+ self.assertEqual(project_map["product"]["name"], "CLI Host")
+ self.assertEqual(project_map["product"]["stack"], "node")
+
+ def test_unknown_option_exits_with_code_2(self) -> None:
+ with self.assertRaises(SystemExit) as ctx:
+ main(["map", "--bogus"])
+ self.assertEqual(ctx.exception.code, 2)
+
+ def test_help_exits_zero(self) -> None:
+ with self.assertRaises(SystemExit) as ctx:
+ main(["--help"])
+ self.assertEqual(ctx.exception.code, 0)
+
+ def test_version_matches_package(self) -> None:
+ with self.assertRaises(SystemExit) as ctx:
+ main(["--version"])
+ self.assertEqual(ctx.exception.code, 0)
+ self.assertTrue(__version__)
+
+
+if __name__ == "__main__":
+ unittest.main()