diff --git a/drift v3/crates/drift-engine/src/main.rs b/drift v3/crates/drift-engine/src/main.rs index 6f9d49a6..33bddf9e 100644 --- a/drift v3/crates/drift-engine/src/main.rs +++ b/drift v3/crates/drift-engine/src/main.rs @@ -34,6 +34,12 @@ struct ReuseIndex { snapshots_by_file: BTreeMap, } +#[derive(Default)] +struct FileDiscoveryResult { + files: Vec, + diagnostics: Vec, +} + fn main() { if let Err(error) = run() { eprintln!("{error}"); @@ -141,16 +147,16 @@ fn scan_repo( reuse_manifest_path: Option<&Path>, ) -> Result> { let started = Instant::now(); - let mut files = Vec::new(); let ignore = IgnoreMatcher::from_repo(repo_root); - collect_indexable_files(repo_root, repo_root, &mut files, &ignore)?; + let discovery = collect_indexable_files(repo_root, &ignore)?; + let mut files = discovery.files; files.sort(); let mut resolver = build_resolver_context(repo_root, &files); let reuse_index = load_reuse_index(reuse_manifest_path)?; let mut scanned_files = Vec::new(); let mut facts = Vec::new(); - let mut diagnostics = Vec::new(); + let mut diagnostics = discovery.diagnostics; let mut graph_node_count = 0_usize; let mut graph_edge_count = 0_usize; let scanned = scan_files(repo_root, &files, &mut diagnostics, reuse_index.as_ref())?; @@ -212,9 +218,9 @@ fn stream_scan_repo( }, )?; - let mut files = Vec::new(); let ignore = IgnoreMatcher::from_repo(repo_root); - collect_indexable_files(repo_root, repo_root, &mut files, &ignore)?; + let discovery = collect_indexable_files(repo_root, &ignore)?; + let mut files = discovery.files; files.sort(); let mut resolver = build_resolver_context(repo_root, &files); let reuse_index = load_reuse_index(reuse_manifest_path)?; @@ -225,7 +231,7 @@ fn stream_scan_repo( let mut graph_nodes_emitted = 0_usize; let mut graph_edges_emitted = 0_usize; let mut diagnostics_emitted = 0_usize; - let mut scan_diagnostics = Vec::new(); + let mut scan_diagnostics = discovery.diagnostics; let mut scanned = scan_files( repo_root, &files, @@ -569,18 +575,24 @@ fn reused_file(file: &ScannedFile, reuse: Option<&ReuseIndex>) -> bool { } fn collect_indexable_files( + repo_root: &Path, + ignore: &IgnoreMatcher, +) -> io::Result { + let mut result = FileDiscoveryResult::default(); + collect_indexable_files_in_dir(repo_root, repo_root, &mut result, ignore)?; + Ok(result) +} + +fn collect_indexable_files_in_dir( repo_root: &Path, dir: &Path, - files: &mut Vec, + result: &mut FileDiscoveryResult, ignore: &IgnoreMatcher, ) -> io::Result<()> { for entry in fs::read_dir(dir)? { let entry = entry?; let path = entry.path(); let file_type = entry.file_type()?; - if file_type.is_symlink() { - continue; - } let relative = path.strip_prefix(repo_root).unwrap_or(&path); if ignore.is_ignored(relative) { continue; @@ -588,11 +600,31 @@ fn collect_indexable_files( if !should_index_path(relative) { continue; } + if file_type.is_symlink() { + if let Err(error) = fs::metadata(&path) { + let code = if error.kind() == io::ErrorKind::NotFound { + "broken_symlink" + } else { + "symlink_target_unreadable" + }; + result.diagnostics.push(EngineDiagnostic { + severity: "warning".to_string(), + code: code.to_string(), + message: format!( + "Skipped symlink {} because its target could not be read: {}", + normalize_path(relative), + error + ), + file_path: Some(normalize_path(relative)), + }); + } + continue; + } if file_type.is_dir() { - collect_indexable_files(repo_root, &path, files, ignore)?; + collect_indexable_files_in_dir(repo_root, &path, result, ignore)?; } else if file_type.is_file() && is_typescript_path(&path) { - files.push(relative.to_path_buf()); + result.files.push(relative.to_path_buf()); } } Ok(()) diff --git a/drift v3/crates/drift-engine/tests/stream_graph.rs b/drift v3/crates/drift-engine/tests/stream_graph.rs index 5888c452..f45e0f91 100644 --- a/drift v3/crates/drift-engine/tests/stream_graph.rs +++ b/drift v3/crates/drift-engine/tests/stream_graph.rs @@ -177,6 +177,72 @@ export async function GET() { assert!(completed["stats"]["graph_edges"].as_u64().unwrap() > 0); } +#[cfg(unix)] +#[test] +fn scan_stream_reports_broken_symlinks_without_failing() { + let dir = tempfile::tempdir().expect("tempdir"); + let route = dir.path().join("app/api/users"); + fs::create_dir_all(&route).expect("create route dir"); + fs::write( + route.join("route.ts"), + r#"export async function GET() { + return Response.json({}); +} +"#, + ) + .expect("write route"); + let ee_dir = dir + .path() + .join("apps/web/app/app.dub.co/(dashboard)/[slug]/(ee)"); + fs::create_dir_all(&ee_dir).expect("create ee dir"); + std::os::unix::fs::symlink("../../../../(ee)/LICENSE.md", ee_dir.join("LICENSE.md")) + .expect("create broken symlink"); + + let output = Command::new(env!("CARGO_BIN_EXE_drift-engine")) + .args([ + "scan-repo", + dir.path().to_str().expect("utf8 temp dir"), + "--format", + "jsonl", + "--repo-id", + "repo_abc", + "--scan-id", + "scan_abc", + ]) + .output() + .expect("run drift-engine"); + assert!( + output.status.success(), + "engine failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let events = String::from_utf8(output.stdout) + .expect("utf8 stdout") + .lines() + .map(|line| serde_json::from_str::(line).expect("json line")) + .collect::>(); + let diagnostics = events + .iter() + .filter(|event| event["event"] == "diagnostic_batch") + .flat_map(|event| event["diagnostics"].as_array().expect("diagnostics").iter()) + .collect::>(); + + assert!( + diagnostics.iter().any(|diagnostic| { + diagnostic["code"] == "broken_symlink" + && diagnostic["file_path"] + == "apps/web/app/app.dub.co/(dashboard)/[slug]/(ee)/LICENSE.md" + }), + "missing broken symlink diagnostic: {diagnostics:#?}" + ); + let completed = events + .iter() + .find(|event| event["event"] == "scan_completed") + .expect("scan_completed event"); + assert_eq!(completed["stats"]["files_skipped"].as_u64().unwrap(), 1); +} + #[test] fn scan_stream_resolves_alias_workspace_index_imports_and_reports_unresolved_imports() { let dir = tempfile::tempdir().expect("tempdir"); diff --git a/drift v3/packages/cli/src/domain/repo-paths.ts b/drift v3/packages/cli/src/domain/repo-paths.ts index 7d6fb4d1..5d9eacb6 100644 --- a/drift v3/packages/cli/src/domain/repo-paths.ts +++ b/drift v3/packages/cli/src/domain/repo-paths.ts @@ -2,7 +2,7 @@ import { DRIFT_CONTRACT_SCHEMA_VERSION,type ConventionCandidate,type RepoContrac import type { SqliteDriftStorage } from "@drift/storage"; import { execFileSync } from "node:child_process"; import { createHash } from "node:crypto"; -import { existsSync,mkdirSync,readdirSync,readFileSync,statSync } from "node:fs"; +import { existsSync,lstatSync,mkdirSync,readdirSync,readFileSync,statSync } from "node:fs"; import { dirname,join,relative } from "node:path"; import { hashStable,repoIdForRoot } from "./identifiers.js"; @@ -180,7 +180,15 @@ function collectResolverInputs(repoRoot: string, current: string, results: strin continue; } const absolutePath = join(current, entry); - const stats = statSync(absolutePath); + let stats; + try { + stats = lstatSync(absolutePath); + } catch { + continue; + } + if (stats.isSymbolicLink()) { + continue; + } if (stats.isDirectory()) { collectResolverInputs(repoRoot, absolutePath, results); continue; diff --git a/drift v3/packages/cli/src/domain/scan-status.ts b/drift v3/packages/cli/src/domain/scan-status.ts index 3789dad8..a0300bd3 100644 --- a/drift v3/packages/cli/src/domain/scan-status.ts +++ b/drift v3/packages/cli/src/domain/scan-status.ts @@ -841,6 +841,8 @@ function parserGapKindForDiagnostic(code: string): ParserGapKind | null { case "unsupported_namespace_import_symbol": return "unsupported_framework_pattern"; case "typescript_fallback_used": + case "broken_symlink": + case "symlink_target_unreadable": case "file_too_large": case "unsupported_dynamic_middleware_matcher": return "partial_parse"; diff --git a/drift v3/packages/cli/test/cli.test.ts b/drift v3/packages/cli/test/cli.test.ts index 7f68e762..222f5cb7 100644 --- a/drift v3/packages/cli/test/cli.test.ts +++ b/drift v3/packages/cli/test/cli.test.ts @@ -1,4 +1,4 @@ -import { mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from "node:fs/promises"; +import { mkdir, mkdtemp, readFile, readdir, rm, stat, symlink, writeFile } from "node:fs/promises"; import { createHash } from "node:crypto"; import { join } from "node:path"; import { tmpdir } from "node:os"; @@ -2149,6 +2149,53 @@ describe("drift CLI convention review", () => { expect(result.stdout).toContain("drift check --diff main...HEAD"); }); + it.skipIf(process.platform === "win32")("starts when an upstream repo contains a broken symlink", async () => { + const dir = await mkdtemp(join(tmpdir(), "drift-start-broken-symlink-")); + tempDirs.push(dir); + const repoRoot = join(dir, "repo"); + const stateRoot = join(dir, "state"); + await mkdir(join(repoRoot, "apps/web/app/api/users"), { recursive: true }); + await writeFile( + join(repoRoot, "apps/web/app/api/users/route.ts"), + [ + "export async function GET() {", + " return Response.json({ ok: true });", + "}", + "" + ].join("\n") + ); + const eeDir = join(repoRoot, "apps/web/app/app.dub.co/(dashboard)/[slug]/(ee)"); + await mkdir(eeDir, { recursive: true }); + await symlink("../../../../(ee)/LICENSE.md", join(eeDir, "LICENSE.md")); + + const result = await runCli([ + "start", + "--repo-root", repoRoot, + "--state-root", stateRoot, + "--now", "2026-05-10T00:00:25.000Z", + "--json" + ]); + + expect(result.exitCode).toBe(0); + const payload = JSON.parse(result.stdout); + expect(payload.summary).toMatchObject({ + files_indexed: 1, + files_skipped: 1, + diagnostics_count: 1, + engine_source: "rust" + }); + const storage = openDriftStorage({ databasePath: payload.state.database_path }); + storage.migrate(); + expect(storage.listParserGaps(payload.repo.id, payload.scan.id)).toMatchObject([ + { + kind: "partial_parse", + file_path: "apps/web/app/app.dub.co/(dashboard)/[slug]/(ee)/LICENSE.md", + confidence_impact: "blocks_enforcement" + } + ]); + storage.close(); + }); + it("starts onboarding with accept-defaults, materializes contract, and baselines existing violations", async () => { const dir = await mkdtemp(join(tmpdir(), "drift-start-defaults-")); tempDirs.push(dir);