From 17f46ef7b88f8dd9dca13acedb0a218131f8ac13 Mon Sep 17 00:00:00 2001 From: Dmitry Kireev Date: Wed, 8 Apr 2026 12:24:24 +0000 Subject: [PATCH 1/4] Add ollama local provider support to codex command - feat(codex): add --ollama flag and use_ollama config to enable local ollama provider with --oss and CODEX_OSS_BASE_URL env - fix(codex): strip --ollama flag from forwarded args and conditionally add --oss and --local-provider ollama flags - test(codex): add tests to verify --ollama flag, config use_ollama, and correct flags/env injection for codex command - chore(.gitignore): add .gocache and .gomodcache entries - chore(nixhome): remove stale oss_provider = "lms" from ~/.codex/config.toml during nixhome codex setup - fix(cmd/agent_test.go): update TestCodex_DefaultFlags to expect no --oss flag without ollama use --- .gitignore | 2 + cmd/agent_test.go | 12 ++- cmd/codex.go | 60 ++++++++++- cmd/codex_test.go | 140 ++++++++++++++++++++++++++ nixhome/modules/base.nix | 52 +++++----- nixhome/modules/fragments/30-codex.sh | 7 ++ 6 files changed, 241 insertions(+), 32 deletions(-) create mode 100644 cmd/codex_test.go diff --git a/.gitignore b/.gitignore index b33dbf2..5198e7d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ test/testdata/**/nixhome test/results .devcell .devcell.toml +.gocache +.gomodcache diff --git a/cmd/agent_test.go b/cmd/agent_test.go index 0c5b731..f8bdfc9 100644 --- a/cmd/agent_test.go +++ b/cmd/agent_test.go @@ -77,15 +77,17 @@ func TestClaude_WithUserArgs(t *testing.T) { // --- codex --- func TestCodex_DefaultFlags(t *testing.T) { - argv := buildTestArgv("codex", []string{"--dangerously-bypass-approvals-and-sandbox", "--oss", "-p", "lms"}, nil) + // No ollama: only --dangerously-bypass-approvals-and-sandbox; no --oss. + argv := buildTestArgv("codex", []string{"--dangerously-bypass-approvals-and-sandbox"}, nil) tail := trailingAfterImage(argv) if tail[0] != "codex" { t.Errorf("expected codex binary, got: %v", tail) } - for _, flag := range []string{"--dangerously-bypass-approvals-and-sandbox", "--oss", "-p", "lms"} { - if !hasArg(tail, flag) { - t.Errorf("missing flag %q in tail: %v", flag, tail) - } + if !hasArg(tail, "--dangerously-bypass-approvals-and-sandbox") { + t.Errorf("missing --dangerously-bypass-approvals-and-sandbox in tail: %v", tail) + } + if hasArg(tail, "--oss") { + t.Errorf("unexpected --oss without ollama in tail: %v", tail) } } diff --git a/cmd/codex.go b/cmd/codex.go index 6dfc8d4..96d901a 100644 --- a/cmd/codex.go +++ b/cmd/codex.go @@ -1,6 +1,13 @@ package main -import "github.com/spf13/cobra" +import ( + "fmt" + "os" + + "github.com/DimmKirr/devcell/internal/cfg" + "github.com/DimmKirr/devcell/internal/config" + "github.com/spf13/cobra" +) var codexCmd = &cobra.Command{ Use: "codex [args...]", @@ -10,15 +17,25 @@ var codexCmd = &cobra.Command{ The current working directory is mounted as /workspace. All additional args are forwarded to the codex binary unchanged. +When use_ollama = true in the [llm] section of devcell.toml (or --ollama +is passed), Codex is started with --oss --local-provider ollama and +CODEX_OSS_BASE_URL pointing at the host ollama instance. The model from +llm.models.default is also passed when set. + +Without ollama configured, Codex runs normally against the cloud provider +(requires OPENAI_API_KEY or equivalent). + Examples: cell codex - cell codex --model o4-mini`, + cell codex --ollama + cell codex --model o3`, DisableFlagParsing: true, RunE: func(cmd *cobra.Command, args []string) error { + extraFlags, extraEnv := codexOllamaConfig() return runAgent("codex", - []string{"--dangerously-bypass-approvals-and-sandbox", "--oss", "-p", "lms"}, - args, nil) + append([]string{"--dangerously-bypass-approvals-and-sandbox"}, extraFlags...), + args, extraEnv) }, } @@ -41,3 +58,38 @@ Examples: func init() { codexCmd.AddCommand(codexResumeCmd) } + +// codexOllamaConfig returns extra CLI flags and env vars when ollama mode is +// active (use_ollama=true in devcell.toml, or --ollama flag). +// Returns nil, nil when ollama is not configured — Codex runs normally. +func codexOllamaConfig() (flags []string, env map[string]string) { + dbg := scanFlag("--debug") + useOllama := scanFlag("--ollama") + + var model string + if !useOllama { + c, err := config.LoadFromOS() + if err == nil { + cellCfg := cfg.LoadFromOS(c.ConfigDir, c.BaseDir) + useOllama = cellCfg.LLM.UseOllama + model = cellCfg.LLM.Models.Default + } + } + + if !useOllama { + return nil, nil + } + + if dbg { + fmt.Fprintf(os.Stderr, " codex: ollama mode enabled\n") + } + + flags = []string{"--oss", "--local-provider", "ollama"} + if model != "" { + flags = append(flags, "--model", model) + } + + return flags, map[string]string{ + "CODEX_OSS_BASE_URL": "http://host.docker.internal:11434/v1", + } +} diff --git a/cmd/codex_test.go b/cmd/codex_test.go new file mode 100644 index 0000000..c225479 --- /dev/null +++ b/cmd/codex_test.go @@ -0,0 +1,140 @@ +package main_test + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +// TestCodex_OllamaFlag_InjectsFlags verifies that "cell codex --ollama --dry-run" +// passes --oss --local-provider ollama and CODEX_OSS_BASE_URL. +func TestCodex_OllamaFlag_InjectsFlags(t *testing.T) { + home := scaffoldedHome(t) + + cmd := exec.Command(binaryPath, "codex", "--ollama", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("codex --ollama --dry-run failed: %v\noutput: %s", err, out) + } + + argv := string(out) + if !strings.Contains(argv, "CODEX_OSS_BASE_URL=http://host.docker.internal:11434/v1") { + t.Errorf("expected CODEX_OSS_BASE_URL in argv:\n%s", argv) + } + if !strings.Contains(argv, "--oss") { + t.Errorf("expected --oss in argv:\n%s", argv) + } + if !strings.Contains(argv, "--local-provider ollama") { + t.Errorf("expected --local-provider ollama in argv:\n%s", argv) + } +} + +// TestCodex_OllamaFlag_Stripped verifies --ollama is not forwarded to codex. +func TestCodex_OllamaFlag_Stripped(t *testing.T) { + home := scaffoldedHome(t) + + cmd := exec.Command(binaryPath, "codex", "--ollama", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("codex --ollama --dry-run failed: %v\noutput: %s", err, out) + } + + argv := strings.TrimSpace(string(out)) + for _, p := range strings.Fields(argv) { + if p == "--ollama" { + t.Errorf("--ollama should be stripped from argv, but found it:\n%s", argv) + } + } +} + +// TestCodex_NoOllama_NoOSSFlags verifies that without ollama config, +// codex is started without --oss (uses cloud provider). +func TestCodex_NoOllama_NoOSSFlags(t *testing.T) { + home := scaffoldedHome(t) + + cmd := exec.Command(binaryPath, "codex", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("codex --dry-run failed: %v\noutput: %s", err, out) + } + + argv := string(out) + if strings.Contains(argv, " --oss") { + t.Errorf("--oss should not be passed without ollama mode:\n%s", argv) + } + if strings.Contains(argv, "--local-provider") { + t.Errorf("--local-provider should not be passed without ollama mode:\n%s", argv) + } +} + +// TestCodex_ConfigUseOllama_InjectsFlags verifies that [llm] use_ollama=true +// in devcell.toml enables --oss --local-provider ollama. +func TestCodex_ConfigUseOllama_InjectsFlags(t *testing.T) { + home := scaffoldedHome(t) + + cfgDir := filepath.Join(home, ".config", "devcell") + tomlContent := `[cell] +[llm] +use_ollama = true +` + if err := os.WriteFile(filepath.Join(cfgDir, "devcell.toml"), []byte(tomlContent), 0644); err != nil { + t.Fatal(err) + } + + cmd := exec.Command(binaryPath, "codex", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("codex --dry-run failed: %v\noutput: %s", err, out) + } + + argv := string(out) + if !strings.Contains(argv, "CODEX_OSS_BASE_URL=http://host.docker.internal:11434/v1") { + t.Errorf("expected CODEX_OSS_BASE_URL from config:\n%s", argv) + } + if !strings.Contains(argv, "--oss") { + t.Errorf("expected --oss from config:\n%s", argv) + } + if !strings.Contains(argv, "--local-provider ollama") { + t.Errorf("expected --local-provider ollama from config:\n%s", argv) + } +} + +// TestCodex_ConfigUseOllama_WithModel verifies that llm.models.default is +// passed as --model when ollama is enabled. +func TestCodex_ConfigUseOllama_WithModel(t *testing.T) { + home := scaffoldedHome(t) + + cfgDir := filepath.Join(home, ".config", "devcell") + tomlContent := `[cell] +[llm] +use_ollama = true +[llm.models] +default = "qwen2.5-coder:32b" +` + if err := os.WriteFile(filepath.Join(cfgDir, "devcell.toml"), []byte(tomlContent), 0644); err != nil { + t.Fatal(err) + } + + cmd := exec.Command(binaryPath, "codex", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("codex --dry-run failed: %v\noutput: %s", err, out) + } + + argv := string(out) + if !strings.Contains(argv, "--model qwen2.5-coder:32b") { + t.Errorf("expected --model qwen2.5-coder:32b in argv:\n%s", argv) + } +} diff --git a/nixhome/modules/base.nix b/nixhome/modules/base.nix index 2085e2b..e6ca9d5 100644 --- a/nixhome/modules/base.nix +++ b/nixhome/modules/base.nix @@ -8,7 +8,9 @@ # ── Locale support ────────────────────────────────────────────────────────── # Container needs en_US.UTF-8 locale for consistent browser fingerprinting # and correct text handling. LOCALE_ARCHIVE tells glibc where to find locales. - home.sessionVariables.LOCALE_ARCHIVE = "${pkgs.glibcLocales}/lib/locale/locale-archive"; + home.sessionVariables = lib.mkIf pkgs.stdenv.isLinux { + LOCALE_ARCHIVE = "${pkgs.glibcLocales}/lib/locale/locale-archive"; + }; # ── Stage entrypoint fragments to /etc/devcell/entrypoint.d/ ─────────────── # Any module can drop a fragment into ~/.config/devcell/entrypoint.d/ via home.file. @@ -49,31 +51,32 @@ fi ''; - home.file = { - # ── Entrypoint fragments ───────────────────────────────────────────────── - # Standalone shell scripts sourced by entrypoint.sh at container start. - # See fragments/ directory for the actual shell code. - # Locale — must run before any other fragment so bash doesn't warn. - ".config/devcell/entrypoint.d/01-locale.sh" = { - executable = true; - text = '' - #!/bin/sh - export LOCALE_ARCHIVE="${pkgs.glibcLocales}/lib/locale/locale-archive" - ''; - }; - ".config/devcell/entrypoint.d/05-shell-rc.sh" = { - executable = true; - source = ./fragments/05-shell-rc.sh; - }; - ".config/devcell/entrypoint.d/20-homedir.sh" = { - executable = true; - source = ./fragments/20-homedir.sh; + home.file = + { + # ── Entrypoint fragments ─────────────────────────────────────────────── + # Standalone shell scripts sourced by entrypoint.sh at container start. + # See fragments/ directory for the actual shell code. + ".config/devcell/entrypoint.d/05-shell-rc.sh" = { + executable = true; + source = ./fragments/05-shell-rc.sh; + }; + ".config/devcell/entrypoint.d/20-homedir.sh" = { + executable = true; + source = ./fragments/20-homedir.sh; + }; + } + // lib.optionalAttrs pkgs.stdenv.isLinux { + # Locale — must run before any other fragment so bash doesn't warn. + ".config/devcell/entrypoint.d/01-locale.sh" = { + executable = true; + text = '' + #!/bin/sh + export LOCALE_ARCHIVE="${pkgs.glibcLocales}/lib/locale/locale-archive" + ''; + }; }; - }; home.packages = with pkgs; [ - glibcLocales # en_US.UTF-8 locale for browser fingerprinting + text handling - # fonts — monospace with good Unicode block element coverage cascadia-code # Microsoft terminal font; seamless block elements fira-code # popular terminal font; decent block elements @@ -103,5 +106,8 @@ wget # HTTP downloader rsync # fast file sync (used by entrypoint fragment staging) yq-go # TOML/YAML/JSON processor + ] ++ lib.optionals pkgs.stdenv.isLinux [ + glibcLocales # en_US.UTF-8 locale for browser fingerprinting + text handling + bubblewrap # unprivileged sandboxing tool used by Linux-only tooling ]; } diff --git a/nixhome/modules/fragments/30-codex.sh b/nixhome/modules/fragments/30-codex.sh index cd58160..07a8429 100755 --- a/nixhome/modules/fragments/30-codex.sh +++ b/nixhome/modules/fragments/30-codex.sh @@ -127,5 +127,12 @@ if [ -d "$DEVCELL_HOME/.codex/skills" ] && [ -n "$(ls -A "$DEVCELL_HOME/.codex/s log "✓ Codex skills synced from nix" fi +# ── Migrate: remove stale oss_provider = "lms" (set by old -p lms flag) ── +config_toml="$HOME/.codex/config.toml" +if [ -f "$config_toml" ] && grep -q '^oss_provider = "lms"' "$config_toml"; then + sed -i '/^oss_provider = "lms"/d' "$config_toml" + log "✓ Removed stale oss_provider = \"lms\" from ~/.codex/config.toml" +fi + merge_codex_mcp "$HOME/.codex/config.toml" [ -d "$HOME/.codex" ] && chown -R "$HOST_USER" "$HOME/.codex" From f570be072abb27870f9163d61fbd5f8bddabcbc5 Mon Sep 17 00:00:00 2001 From: Dmitry Kireev Date: Fri, 10 Apr 2026 13:24:18 +0000 Subject: [PATCH 2/4] Add output format option with JSON/YAML support and unify listings - feat(root): add --format flag to set output format (text, yaml, json) - refactor(models): extract renderModels function with JSON/YAML output and suppress prose in non-text modes - refactor(models): add ModelEntry struct and print typed JSON/YAML data for model listings - chore(models): remove lipgloss dependencies and replace manual table rendering with ux.PrintTable - refactor(rdp): extract renderRDPList with multi-format output support using ux.PrintTable - refactor(vnc): extract renderVNCList with multi-format output support using ux.PrintTable - chore(ux): add OutputFormat variable and PrintTable/PrintData helpers for text, json, yaml output - test(models_format): add tests for renderModels JSON and text output correctness and prose suppression - test(rdp_format): add tests for renderRDPList JSON and text output correctness, empty case, URL inclusion - test(vnc_format): add tests for renderVNCList JSON and text output correctness, empty case, URL inclusion - test(ux_format): add extensive tests for ux.PrintTable and ux.PrintData in all output formats - chore(strip): extend stripCellFlags to remove --format flag in both space and equals forms - test(strip): add tests verifying --format flag removal and OutputFormat application from args --- cmd/models.go | 155 +++++++++++------ cmd/models_format_test.go | 113 ++++++++++++ cmd/rdp.go | 22 ++- cmd/rdp_format_test.go | 109 ++++++++++++ cmd/root.go | 5 + cmd/strip_test.go | 55 ++++++ cmd/vnc.go | 22 ++- cmd/vnc_format_test.go | 89 ++++++++++ internal/ux/format.go | 79 +++++++++ internal/ux/format_test.go | 210 +++++++++++++++++++++++ nixhome/modules/fragments/30-claude.sh | 15 ++ nixhome/modules/fragments/40-postgres.sh | 10 +- nixhome/modules/scraping/default.nix | 1 + 13 files changed, 810 insertions(+), 75 deletions(-) create mode 100644 cmd/models_format_test.go create mode 100644 cmd/rdp_format_test.go create mode 100644 cmd/vnc_format_test.go create mode 100644 internal/ux/format.go create mode 100644 internal/ux/format_test.go diff --git a/cmd/models.go b/cmd/models.go index 220c81e..07f1c6f 100644 --- a/cmd/models.go +++ b/cmd/models.go @@ -8,11 +8,19 @@ import ( "github.com/DimmKirr/devcell/internal/ollama" "github.com/DimmKirr/devcell/internal/ux" - "github.com/charmbracelet/lipgloss" - "github.com/charmbracelet/lipgloss/table" "github.com/spf13/cobra" ) +// ModelEntry is the typed representation of a ranked model for JSON/YAML output. +type ModelEntry struct { + Rank int `json:"rank" yaml:"rank"` + Name string `json:"name" yaml:"name"` + SWEScore float64 `json:"swe_score" yaml:"swe_score"` + Size string `json:"size" yaml:"size"` + Type string `json:"type" yaml:"type"` + Hardware string `json:"hardware" yaml:"hardware"` +} + // Reuse shared styles from ux package. var ( modGray = ux.StyleMuted @@ -135,27 +143,39 @@ Examples: log.Debug(fmt.Sprintf("System RAM: %.1f GB", systemRAM)) } - fmt.Println() - fmt.Println(modBold.Render(" Local Models (ranked by SWE-Bench score)")) - fmt.Println() + renderModels(ranked, hfInfoMap, systemRAM) - // Build table rows. - rows := make([][]string, 0, len(ranked)) - for _, r := range ranked { - score := modGray.Render("-") - if r.SWEScore > 0 { - label := fmt.Sprintf("~%.0f%%", r.SWEScore) - if r.ScoreSource != "" { - label += " " + modGray.Render(r.ScoreSource) - } - score = label + if ux.OutputFormat == "text" { + fmt.Println(modGray.Render(fmt.Sprintf("%*s", 70, fmt.Sprintf("ollama %s", baseURL)))) + fmt.Println() + if sweErr != nil { + ux.Info("Scores from built-in estimates (SWE-bench fetch failed).") + } else { + ux.Info("Scores from SWE-bench Verified (full-model, not quantized).") } - size := r.ParameterSize - if size == "" { - size = "-" + ux.Info(fmt.Sprintf("Hardware: Q4 estimate vs %.0fGB RAM. --debug for details.", systemRAM)) + fmt.Println() + + snippet := ollama.FormatTOMLSnippet(ranked) + ux.Info(fmt.Sprintf("%d models found. Add to ~/.config/devcell/devcell.toml:", len(ranked))) + fmt.Println() + for _, line := range strings.Split(snippet, "\n") { + fmt.Printf(" %s\n", line) } + fmt.Println() + } + + return nil + }, +} - // Task type from HuggingFace. +// renderModels displays the ranked model list in the current OutputFormat. +// In json/yaml mode, prose (header, TOML snippet, footer) is suppressed. +// Extracted for testability without a live ollama daemon. +func renderModels(ranked []ollama.RankedModel, hfInfoMap map[string]ollama.HFModelInfo, systemRAM float64) { + if ux.OutputFormat != "text" { + entries := make([]ModelEntry, 0, len(ranked)) + for _, r := range ranked { family := ollama.ModelFamily(r.Name) taskLabel := "General" if info, ok := hfInfoMap[family]; ok { @@ -163,57 +183,82 @@ Examples: } else { taskLabel = ollama.InferTaskLabel(ollama.HFModelInfo{}, r.Name) } - - // Hardware check. - hwLabel := modGray.Render("-") + hw := "" if systemRAM > 0 { ok, needed := ollama.CheckHardware(r.ParameterSize, systemRAM) if needed > 0 { if ok { - hwLabel = modGreen.Render(fmt.Sprintf("OK (%.0fGB)", needed)) + hw = fmt.Sprintf("OK (%.0fGB)", needed) } else { - hwLabel = modRed.Render(fmt.Sprintf("%.0fGB needed", needed)) + hw = fmt.Sprintf("%.0fGB needed", needed) } } } - - rows = append(rows, []string{ - fmt.Sprintf("%d", r.Rank), - r.Name, - score, - size, - taskLabel, - hwLabel, + size := r.ParameterSize + if size == "" { + size = "-" + } + entries = append(entries, ModelEntry{ + Rank: r.Rank, + Name: r.Name, + SWEScore: r.SWEScore, + Size: size, + Type: taskLabel, + Hardware: hw, }) } + ux.PrintData(entries) + return + } - t := table.New(). - Border(lipgloss.NormalBorder()). - BorderStyle(ux.TableBorder). - Headers("#", "Model", "Rating", "Size", "Type", "Hardware"). - Rows(rows...) - fmt.Println(t) - fmt.Println(modGray.Render(fmt.Sprintf("%*s", 70, fmt.Sprintf("ollama %s", baseURL)))) + // Text mode: prose header + styled table. + fmt.Println() + fmt.Println(modBold.Render(" Local Models (ranked by SWE-Bench score)")) + fmt.Println() - fmt.Println() - if sweErr != nil { - ux.Info("Scores from built-in estimates (SWE-bench fetch failed).") + headers := []string{"#", "Model", "Rating", "Size", "Type", "Hardware"} + rows := make([][]string, 0, len(ranked)) + for _, r := range ranked { + score := modGray.Render("-") + if r.SWEScore > 0 { + label := fmt.Sprintf("~%.0f%%", r.SWEScore) + if r.ScoreSource != "" { + label += " " + modGray.Render(r.ScoreSource) + } + score = label + } + size := r.ParameterSize + if size == "" { + size = "-" + } + family := ollama.ModelFamily(r.Name) + taskLabel := "General" + if info, ok := hfInfoMap[family]; ok { + taskLabel = ollama.InferTaskLabel(info, r.Name) } else { - ux.Info("Scores from SWE-bench Verified (full-model, not quantized).") + taskLabel = ollama.InferTaskLabel(ollama.HFModelInfo{}, r.Name) } - ux.Info(fmt.Sprintf("Hardware: Q4 estimate vs %.0fGB RAM. --debug for details.", systemRAM)) - fmt.Println() - - snippet := ollama.FormatTOMLSnippet(ranked) - ux.Info(fmt.Sprintf("%d models found. Add to ~/.config/devcell/devcell.toml:", len(ranked))) - fmt.Println() - for _, line := range strings.Split(snippet, "\n") { - fmt.Printf(" %s\n", line) + hwLabel := modGray.Render("-") + if systemRAM > 0 { + ok, needed := ollama.CheckHardware(r.ParameterSize, systemRAM) + if needed > 0 { + if ok { + hwLabel = modGreen.Render(fmt.Sprintf("OK (%.0fGB)", needed)) + } else { + hwLabel = modRed.Render(fmt.Sprintf("%.0fGB needed", needed)) + } + } } - fmt.Println() - - return nil - }, + rows = append(rows, []string{ + fmt.Sprintf("%d", r.Rank), + r.Name, + score, + size, + taskLabel, + hwLabel, + }) + } + ux.PrintTable(headers, rows) } func init() { diff --git a/cmd/models_format_test.go b/cmd/models_format_test.go new file mode 100644 index 0000000..f5c7b8b --- /dev/null +++ b/cmd/models_format_test.go @@ -0,0 +1,113 @@ +package main + +// White-box tests for renderModels — package main for access to unexported symbols. + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/DimmKirr/devcell/internal/ollama" + "github.com/DimmKirr/devcell/internal/ux" +) + +func TestRenderModels_JSONOutputIsValidJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + ranked := []ollama.RankedModel{ + {Model: ollama.Model{Name: "deepseek-r1:32b", ParameterSize: "32B"}, SWEScore: 49.2, Rank: 1, ScoreSource: "SWE"}, + {Model: ollama.Model{Name: "qwen3:8b", ParameterSize: "8B"}, SWEScore: 28.0, Rank: 2, ScoreSource: "est"}, + } + + out := captureStdoutMain(func() { + renderModels(ranked, map[string]ollama.HFModelInfo{}, 32.0) + }) + + var result []map[string]any + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v\noutput: %q", err, out) + } + if len(result) != 2 { + t.Fatalf("want 2 entries, got %d", len(result)) + } +} + +func TestRenderModels_JSONContainsNameAndRank(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + ranked := []ollama.RankedModel{ + {Model: ollama.Model{Name: "deepseek-r1:32b", ParameterSize: "32B"}, SWEScore: 49.2, Rank: 1, ScoreSource: "SWE"}, + } + + out := captureStdoutMain(func() { + renderModels(ranked, map[string]ollama.HFModelInfo{}, 0) + }) + + var result []map[string]any + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v", err) + } + if result[0]["name"] != "deepseek-r1:32b" { + t.Errorf("want name=deepseek-r1:32b, got %v", result[0]["name"]) + } + // rank is a number in JSON + rank, ok := result[0]["rank"].(float64) + if !ok || rank != 1 { + t.Errorf("want rank=1, got %v", result[0]["rank"]) + } +} + +func TestRenderModels_JSONSuppressesProseOutput(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + ranked := []ollama.RankedModel{ + {Model: ollama.Model{Name: "qwen3:8b", ParameterSize: "8B"}, SWEScore: 28.0, Rank: 1}, + } + + out := captureStdoutMain(func() { + renderModels(ranked, map[string]ollama.HFModelInfo{}, 0) + }) + + // Prose like "Local Models", TOML snippet markers, "ollama" footer should NOT appear + if strings.Contains(out, "Local Models") { + t.Errorf("json mode should suppress prose header, got: %q", out) + } + if strings.Contains(out, "[ollama]") { + t.Errorf("json mode should suppress TOML snippet, got: %q", out) + } +} + +func TestRenderModels_TextContainsModelName(t *testing.T) { + ux.OutputFormat = "text" + + ranked := []ollama.RankedModel{ + {Model: ollama.Model{Name: "deepseek-r1:32b", ParameterSize: "32B"}, SWEScore: 49.2, Rank: 1, ScoreSource: "SWE"}, + } + + out := captureStdoutMain(func() { + renderModels(ranked, map[string]ollama.HFModelInfo{}, 0) + }) + + if !strings.Contains(out, "deepseek-r1:32b") { + t.Errorf("text output should contain model name, got: %q", out) + } +} + +func TestRenderModels_TextIncludesProseHeader(t *testing.T) { + ux.OutputFormat = "text" + + ranked := []ollama.RankedModel{ + {Model: ollama.Model{Name: "qwen3:8b", ParameterSize: "8B"}, Rank: 1}, + } + + out := captureStdoutMain(func() { + renderModels(ranked, map[string]ollama.HFModelInfo{}, 0) + }) + + if !strings.Contains(out, "Local Models") { + t.Errorf("text mode should include prose header, got: %q", out) + } +} diff --git a/cmd/rdp.go b/cmd/rdp.go index df62330..086d722 100644 --- a/cmd/rdp.go +++ b/cmd/rdp.go @@ -11,8 +11,6 @@ import ( "github.com/DimmKirr/devcell/internal/config" internalrdp "github.com/DimmKirr/devcell/internal/rdp" "github.com/DimmKirr/devcell/internal/ux" - "github.com/charmbracelet/lipgloss" - "github.com/charmbracelet/lipgloss/table" "github.com/spf13/cobra" ) @@ -246,20 +244,26 @@ func rdpList() error { if err != nil { return err } + return renderRDPList(m) +} + +// renderRDPList renders the RDP container map in the current OutputFormat. +// Extracted for testability without a live docker daemon. +func renderRDPList(m map[string]string) error { + headers := []string{"APP_NAME", "PORT", "URL"} if len(m) == 0 { - fmt.Println("No running cell containers with RDP found.") + if ux.OutputFormat != "text" { + ux.PrintTable(headers, nil) + } else { + fmt.Println("No running cell containers with RDP found.") + } return nil } var rows [][]string for app, port := range m { rows = append(rows, []string{app, port, internalrdp.RDPUrl(port)}) } - t := table.New(). - Border(lipgloss.NormalBorder()). - BorderStyle(ux.TableBorder). - Headers("APP_NAME", "PORT", "URL"). - Rows(rows...) - fmt.Println(t) + ux.PrintTable(headers, rows) return nil } diff --git a/cmd/rdp_format_test.go b/cmd/rdp_format_test.go new file mode 100644 index 0000000..f146912 --- /dev/null +++ b/cmd/rdp_format_test.go @@ -0,0 +1,109 @@ +package main + +// White-box tests for renderRDPList — package main for access to unexported symbols. + +import ( + "bytes" + "encoding/json" + "io" + "os" + "strings" + "testing" + + "github.com/DimmKirr/devcell/internal/ux" +) + +// captureStdoutMain redirects os.Stdout during fn and returns what was written. +// (package main equivalent of ux_test's captureStdout) +func captureStdoutMain(fn func()) string { + r, w, _ := os.Pipe() + old := os.Stdout + os.Stdout = w + fn() + w.Close() + os.Stdout = old + var buf bytes.Buffer + io.Copy(&buf, r) + return buf.String() +} + +func TestRenderRDPList_JSONFormat(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + m := map[string]string{"devcell-42-run": "3456"} + + out := captureStdoutMain(func() { renderRDPList(m) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v\noutput: %q", err, out) + } + if len(result) != 1 { + t.Fatalf("want 1 entry, got %d", len(result)) + } + if result[0]["app_name"] != "devcell-42-run" { + t.Errorf("want app_name=devcell-42-run, got %q", result[0]["app_name"]) + } + if result[0]["port"] != "3456" { + t.Errorf("want port=3456, got %q", result[0]["port"]) + } +} + +func TestRenderRDPList_EmptyMapJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + out := captureStdoutMain(func() { renderRDPList(map[string]string{}) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("empty list should produce valid JSON array: %v\noutput: %q", err, out) + } + if len(result) != 0 { + t.Errorf("want empty array, got %d entries", len(result)) + } +} + +func TestRenderRDPList_EmptyMapText(t *testing.T) { + ux.OutputFormat = "text" + + out := captureStdoutMain(func() { renderRDPList(map[string]string{}) }) + + if !strings.Contains(out, "No running") { + t.Errorf("text empty message should contain 'No running', got: %q", out) + } +} + +func TestRenderRDPList_TextContainsAppNameAndPort(t *testing.T) { + ux.OutputFormat = "text" + + m := map[string]string{"cell-abc-run": "3389"} + + out := captureStdoutMain(func() { renderRDPList(m) }) + + if !strings.Contains(out, "cell-abc-run") { + t.Errorf("text output should contain app name, got: %q", out) + } + if !strings.Contains(out, "3389") { + t.Errorf("text output should contain port, got: %q", out) + } +} + +func TestRenderRDPList_URLIncludedInJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + m := map[string]string{"cell-1-run": "3389"} + + out := captureStdoutMain(func() { renderRDPList(m) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v", err) + } + url := result[0]["url"] + if !strings.Contains(url, "3389") { + t.Errorf("url should contain port 3389, got %q", url) + } +} diff --git a/cmd/root.go b/cmd/root.go index 99c9e15..715aeb7 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -64,6 +64,7 @@ func init() { rootCmd.PersistentFlags().Bool("dry-run", false, "print docker run argv and exit without running") rootCmd.PersistentFlags().Bool("plain-text", false, "disable spinners, use plain log output (for CI/non-TTY)") rootCmd.PersistentFlags().Bool("debug", false, "plain-text mode plus stream full build log to stdout") + rootCmd.PersistentFlags().String("format", "text", "output format: text, yaml, or json") rootCmd.PersistentFlags().String("engine", "docker", "execution engine: docker or vagrant") rootCmd.PersistentFlags().Bool("macos", false, "use macOS VM via Vagrant (alias for --engine=vagrant)") rootCmd.PersistentFlags().String("vagrant-provider", "utm", "Vagrant provider (e.g. utm)") @@ -103,6 +104,9 @@ func applyOutputFlags() { ux.Verbose = true } } + if f := scanStringFlag("--format"); f != "" { + ux.OutputFormat = f + } } // cellBoolFlags are boolean flags consumed by devcell: strip the flag token only. @@ -123,6 +127,7 @@ var cellStringFlags = map[string]bool{ "--vagrant-box": true, "--base-image": true, "--session-name": true, + "--format": true, } // stripCellFlags removes devcell-specific flags (and their values) from args diff --git a/cmd/strip_test.go b/cmd/strip_test.go index 76fc67f..cee4045 100644 --- a/cmd/strip_test.go +++ b/cmd/strip_test.go @@ -5,6 +5,8 @@ package main import ( "reflect" "testing" + + "github.com/DimmKirr/devcell/internal/ux" ) func TestStripCellFlags_BoolFlagStripped(t *testing.T) { @@ -109,6 +111,59 @@ func TestStripCellFlags_EmptyInput(t *testing.T) { } } +func TestStripCellFlags_FormatSpaceFormStripped(t *testing.T) { + got := stripCellFlags([]string{"--format", "json", "claude"}) + want := []string{"claude"} + if !reflect.DeepEqual(got, want) { + t.Errorf("--format space form should be stripped: want %v, got %v", want, got) + } +} + +func TestStripCellFlags_FormatEqualsFormStripped(t *testing.T) { + got := stripCellFlags([]string{"--format=yaml", "claude"}) + want := []string{"claude"} + if !reflect.DeepEqual(got, want) { + t.Errorf("--format=value should be stripped: want %v, got %v", want, got) + } +} + +func TestApplyOutputFlags_FormatJSON(t *testing.T) { + old := osArgs + osArgs = []string{"cell", "rdp", "--list", "--format", "json"} + defer func() { osArgs = old; ux.OutputFormat = "text" }() + + applyOutputFlags() + + if ux.OutputFormat != "json" { + t.Errorf("want OutputFormat=json, got %q", ux.OutputFormat) + } +} + +func TestApplyOutputFlags_FormatEqualsForm(t *testing.T) { + old := osArgs + osArgs = []string{"cell", "--format=yaml", "rdp", "--list"} + defer func() { osArgs = old; ux.OutputFormat = "text" }() + + applyOutputFlags() + + if ux.OutputFormat != "yaml" { + t.Errorf("want OutputFormat=yaml, got %q", ux.OutputFormat) + } +} + +func TestApplyOutputFlags_NoFormatLeavesDefault(t *testing.T) { + old := osArgs + osArgs = []string{"cell", "rdp", "--list"} + defer func() { osArgs = old; ux.OutputFormat = "text" }() + + ux.OutputFormat = "text" + applyOutputFlags() + + if ux.OutputFormat != "text" { + t.Errorf("want OutputFormat=text (unchanged), got %q", ux.OutputFormat) + } +} + func TestScanStringFlag_SpaceForm(t *testing.T) { old := osArgs osArgs = []string{"cell", "--engine", "vagrant", "claude"} diff --git a/cmd/vnc.go b/cmd/vnc.go index 5a35a5c..c4f2b1c 100644 --- a/cmd/vnc.go +++ b/cmd/vnc.go @@ -12,8 +12,6 @@ import ( internalrdp "github.com/DimmKirr/devcell/internal/rdp" "github.com/DimmKirr/devcell/internal/ux" internalvnc "github.com/DimmKirr/devcell/internal/vnc" - "github.com/charmbracelet/lipgloss" - "github.com/charmbracelet/lipgloss/table" "github.com/spf13/cobra" ) @@ -225,20 +223,26 @@ func vncList() error { if err != nil { return err } + return renderVNCList(m) +} + +// renderVNCList renders the VNC container map in the current OutputFormat. +// Extracted for testability without a live docker daemon. +func renderVNCList(m map[string]string) error { + headers := []string{"APP_NAME", "PORT", "URL"} if len(m) == 0 { - fmt.Println("No running cell containers found.") + if ux.OutputFormat != "text" { + ux.PrintTable(headers, nil) + } else { + fmt.Println("No running cell containers found.") + } return nil } var rows [][]string for app, port := range m { rows = append(rows, []string{app, port, internalvnc.VNCUrl(port)}) } - t := table.New(). - Border(lipgloss.NormalBorder()). - BorderStyle(ux.TableBorder). - Headers("APP_NAME", "PORT", "URL"). - Rows(rows...) - fmt.Println(t) + ux.PrintTable(headers, rows) return nil } diff --git a/cmd/vnc_format_test.go b/cmd/vnc_format_test.go new file mode 100644 index 0000000..142dc16 --- /dev/null +++ b/cmd/vnc_format_test.go @@ -0,0 +1,89 @@ +package main + +// White-box tests for renderVNCList — package main for access to unexported symbols. + +import ( + "encoding/json" + "strings" + "testing" + + "github.com/DimmKirr/devcell/internal/ux" +) + +func TestRenderVNCList_JSONFormat(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + m := map[string]string{"devcell-7-run": "5922"} + + out := captureStdoutMain(func() { renderVNCList(m) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v\noutput: %q", err, out) + } + if len(result) != 1 { + t.Fatalf("want 1 entry, got %d", len(result)) + } + if result[0]["app_name"] != "devcell-7-run" { + t.Errorf("want app_name=devcell-7-run, got %q", result[0]["app_name"]) + } + if result[0]["port"] != "5922" { + t.Errorf("want port=5922, got %q", result[0]["port"]) + } +} + +func TestRenderVNCList_EmptyMapJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + out := captureStdoutMain(func() { renderVNCList(map[string]string{}) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("empty list should produce valid JSON array: %v\noutput: %q", err, out) + } + if len(result) != 0 { + t.Errorf("want empty array, got %d entries", len(result)) + } +} + +func TestRenderVNCList_EmptyMapText(t *testing.T) { + ux.OutputFormat = "text" + + out := captureStdoutMain(func() { renderVNCList(map[string]string{}) }) + + if !strings.Contains(out, "No running") { + t.Errorf("text empty message should contain 'No running', got: %q", out) + } +} + +func TestRenderVNCList_TextContainsAppNameAndPort(t *testing.T) { + ux.OutputFormat = "text" + + m := map[string]string{"cell-vnc-run": "5900"} + + out := captureStdoutMain(func() { renderVNCList(m) }) + + if !strings.Contains(out, "cell-vnc-run") { + t.Errorf("text output should contain app name, got: %q", out) + } +} + +func TestRenderVNCList_URLIncludedInJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + m := map[string]string{"cell-1-run": "5900"} + + out := captureStdoutMain(func() { renderVNCList(m) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v", err) + } + url := result[0]["url"] + if !strings.Contains(url, "5900") { + t.Errorf("url should contain port 5900, got %q", url) + } +} diff --git a/internal/ux/format.go b/internal/ux/format.go new file mode 100644 index 0000000..09dcf6e --- /dev/null +++ b/internal/ux/format.go @@ -0,0 +1,79 @@ +package ux + +import ( + "encoding/json" + "fmt" + "os" + "strings" + + "github.com/charmbracelet/lipgloss" + "github.com/charmbracelet/lipgloss/table" + "gopkg.in/yaml.v3" +) + +// OutputFormat controls how PrintTable and PrintData emit output. +// Values: "text" (default lipgloss table), "json", "yaml". +var OutputFormat = "text" + +// PrintTable renders headers+rows in the current OutputFormat. +// text: lipgloss bordered table. json/yaml: array of objects keyed by header. +func PrintTable(headers []string, rows [][]string) { + switch OutputFormat { + case "json": + data := rowsToMaps(headers, rows) + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + enc.Encode(data) //nolint:errcheck + case "yaml": + data := rowsToMaps(headers, rows) + enc := yaml.NewEncoder(os.Stdout) + enc.SetIndent(2) + enc.Encode(data) //nolint:errcheck + enc.Close() //nolint:errcheck + default: + t := table.New(). + Border(lipgloss.NormalBorder()). + BorderStyle(TableBorder). + Headers(headers...). + Rows(rows...) + fmt.Println(t) + } +} + +// PrintData serialises any Go value in the current OutputFormat. +// Use this when commands build typed structs (e.g. for models output). +// In text mode it falls back to JSON so the caller always gets parseable output. +func PrintData(v any) { + switch OutputFormat { + case "yaml": + enc := yaml.NewEncoder(os.Stdout) + enc.SetIndent(2) + enc.Encode(v) //nolint:errcheck + enc.Close() //nolint:errcheck + default: // "json" and "text" fallback + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + enc.Encode(v) //nolint:errcheck + } +} + +// headerKey converts a table header string to a JSON/YAML object key. +// "APP_NAME" → "app_name", "RDP Port" → "rdp_port" +func headerKey(h string) string { + return strings.ToLower(strings.ReplaceAll(h, " ", "_")) +} + +// rowsToMaps converts parallel header+row slices to a slice of string maps. +func rowsToMaps(headers []string, rows [][]string) []map[string]string { + result := make([]map[string]string, 0, len(rows)) + for _, row := range rows { + m := make(map[string]string, len(headers)) + for i, h := range headers { + if i < len(row) { + m[headerKey(h)] = row[i] + } + } + result = append(result, m) + } + return result +} diff --git a/internal/ux/format_test.go b/internal/ux/format_test.go new file mode 100644 index 0000000..2afb90f --- /dev/null +++ b/internal/ux/format_test.go @@ -0,0 +1,210 @@ +package ux_test + +import ( + "bytes" + "encoding/json" + "io" + "os" + "strings" + "testing" + + "github.com/DimmKirr/devcell/internal/ux" + "gopkg.in/yaml.v3" +) + +// captureStdout redirects os.Stdout during fn and returns what was written. +func captureStdout(fn func()) string { + r, w, _ := os.Pipe() + old := os.Stdout + os.Stdout = w + fn() + w.Close() + os.Stdout = old + var buf bytes.Buffer + io.Copy(&buf, r) + return buf.String() +} + +func TestOutputFormatDefaultIsText(t *testing.T) { + ux.OutputFormat = "text" + if ux.OutputFormat != "text" { + t.Errorf("default OutputFormat should be text, got %q", ux.OutputFormat) + } +} + +func TestPrintTable_JSONOutputIsValidJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + headers := []string{"APP_NAME", "PORT", "URL"} + rows := [][]string{ + {"devcell-123-run", "3389", "rdp://127.0.0.1:3389"}, + } + + out := captureStdout(func() { ux.PrintTable(headers, rows) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("output is not valid JSON: %v\noutput: %q", err, out) + } + if len(result) != 1 { + t.Fatalf("want 1 entry, got %d", len(result)) + } + if got := result[0]["app_name"]; got != "devcell-123-run" { + t.Errorf("want app_name=devcell-123-run, got %q", got) + } + if got := result[0]["port"]; got != "3389" { + t.Errorf("want port=3389, got %q", got) + } + if got := result[0]["url"]; got != "rdp://127.0.0.1:3389" { + t.Errorf("want url=rdp://127.0.0.1:3389, got %q", got) + } +} + +func TestPrintTable_YAMLOutputIsValidYAML(t *testing.T) { + ux.OutputFormat = "yaml" + defer func() { ux.OutputFormat = "text" }() + + headers := []string{"APP_NAME", "PORT"} + rows := [][]string{{"devcell-42-run", "5900"}} + + out := captureStdout(func() { ux.PrintTable(headers, rows) }) + + var result []map[string]string + if err := yaml.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("output is not valid YAML: %v\noutput: %q", err, out) + } + if len(result) != 1 { + t.Fatalf("want 1 entry, got %d", len(result)) + } + if got := result[0]["app_name"]; got != "devcell-42-run" { + t.Errorf("want app_name=devcell-42-run, got %q", got) + } + if got := result[0]["port"]; got != "5900" { + t.Errorf("want port=5900, got %q", got) + } +} + +func TestPrintTable_TextContainsHeadersAndData(t *testing.T) { + ux.OutputFormat = "text" + + headers := []string{"NAME", "PORT"} + rows := [][]string{{"myapp", "8080"}} + + out := captureStdout(func() { ux.PrintTable(headers, rows) }) + + if !strings.Contains(out, "myapp") { + t.Errorf("text output should contain row data, got: %q", out) + } + if !strings.Contains(out, "NAME") { + t.Errorf("text output should contain header NAME, got: %q", out) + } +} + +func TestPrintTable_JSONEmptyRowsIsEmptyArray(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + out := captureStdout(func() { + ux.PrintTable([]string{"APP_NAME", "PORT"}, [][]string{}) + }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("empty rows should produce valid JSON: %v\noutput: %q", err, out) + } + if len(result) != 0 { + t.Errorf("want empty array, got %d entries", len(result)) + } +} + +func TestPrintTable_HeaderKeyConversion(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + // APP_NAME → app_name, "RDP Port" → rdp_port + headers := []string{"APP_NAME", "RDP Port"} + rows := [][]string{{"cell-1", "3389"}} + + out := captureStdout(func() { ux.PrintTable(headers, rows) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v", err) + } + if _, ok := result[0]["app_name"]; !ok { + t.Errorf("APP_NAME should map to app_name, got keys: %v", mapKeys(result[0])) + } + if _, ok := result[0]["rdp_port"]; !ok { + t.Errorf("RDP Port should map to rdp_port, got keys: %v", mapKeys(result[0])) + } +} + +func TestPrintTable_MultipleRows_JSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + headers := []string{"NAME", "PORT"} + rows := [][]string{{"a", "1"}, {"b", "2"}, {"c", "3"}} + + out := captureStdout(func() { ux.PrintTable(headers, rows) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v", err) + } + if len(result) != 3 { + t.Errorf("want 3 entries, got %d", len(result)) + } +} + +func TestPrintData_JSONOutputIsValidJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + data := []struct { + Name string `json:"name"` + Port string `json:"port"` + }{{"cell-1", "3389"}, {"cell-2", "3390"}} + + out := captureStdout(func() { ux.PrintData(data) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("PrintData json not valid: %v\noutput: %q", err, out) + } + if len(result) != 2 { + t.Fatalf("want 2 entries, got %d", len(result)) + } + if result[0]["name"] != "cell-1" { + t.Errorf("want name=cell-1, got %q", result[0]["name"]) + } +} + +func TestPrintData_YAMLOutputIsValidYAML(t *testing.T) { + ux.OutputFormat = "yaml" + defer func() { ux.OutputFormat = "text" }() + + data := []struct { + Name string `yaml:"name"` + Port string `yaml:"port"` + }{{"myapp", "9000"}} + + out := captureStdout(func() { ux.PrintData(data) }) + + var result []map[string]string + if err := yaml.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("PrintData yaml not valid: %v\noutput: %q", err, out) + } + if result[0]["name"] != "myapp" { + t.Errorf("want name=myapp, got %q", result[0]["name"]) + } +} + +func mapKeys(m map[string]string) []string { + ks := make([]string, 0, len(m)) + for k := range m { + ks = append(ks, k) + } + return ks +} diff --git a/nixhome/modules/fragments/30-claude.sh b/nixhome/modules/fragments/30-claude.sh index 3a46ec0..671fd5c 100755 --- a/nixhome/modules/fragments/30-claude.sh +++ b/nixhome/modules/fragments/30-claude.sh @@ -149,4 +149,19 @@ merge_claude_nix # Merge nix MCP servers into user config merge_claude_mcp "$HOME/.claude.json" + +# Linear MCP: inject Bearer token auth when LINEAR_API_KEY is set, +# overriding the OAuth plugin entry. Falls back to plugin OAuth when unset. +if [ -n "${LINEAR_API_KEY:-}" ] && [ -f "$HOME/.claude.json" ]; then + _tmp=$(mktemp) + jq --arg key "$LINEAR_API_KEY" \ + '.mcpServers.linear = {type:"http", url:"https://mcp.linear.app/mcp", headers:{Authorization:("Bearer "+$key)}}' \ + "$HOME/.claude.json" > "$_tmp" 2>/dev/null \ + && mv "$_tmp" "$HOME/.claude.json" \ + && log "✓ Linear MCP: Bearer token auth (LINEAR_API_KEY set)" \ + || { rm -f "$_tmp"; log "⚠ Linear MCP: failed to inject Bearer token"; } +else + log "Linear MCP: no LINEAR_API_KEY — using OAuth plugin" +fi + [ -f "$HOME/.claude.json" ] && chown "$HOST_USER" "$HOME/.claude.json" diff --git a/nixhome/modules/fragments/40-postgres.sh b/nixhome/modules/fragments/40-postgres.sh index d5bb179..ad1587b 100644 --- a/nixhome/modules/fragments/40-postgres.sh +++ b/nixhome/modules/fragments/40-postgres.sh @@ -33,8 +33,14 @@ fi chown -R "$HOST_USER" "$PGDATA" # Start PostgreSQL as session user (TCP on localhost:5432 + Unix socket in /tmp) -gosu "$HOST_USER" "$NIX_BIN/pg_ctl" -D "$PGDATA" -l "$PGDATA/postgresql.log" \ - -o "-p $PGPORT -k /tmp" start +# pg_ctl prints "waiting for server to start..." to stdout — suppress unless debug mode. +if [ "${DEVCELL_DEBUG:-false}" = "true" ]; then + gosu "$HOST_USER" "$NIX_BIN/pg_ctl" -D "$PGDATA" -l "$PGDATA/postgresql.log" \ + -o "-p $PGPORT -k /tmp" start +else + gosu "$HOST_USER" "$NIX_BIN/pg_ctl" -D "$PGDATA" -l "$PGDATA/postgresql.log" \ + -o "-p $PGPORT -k /tmp" start >/dev/null 2>&1 +fi # Readiness gate — block until accepting connections (up to 15s) for _ in $(seq 1 30); do diff --git a/nixhome/modules/scraping/default.nix b/nixhome/modules/scraping/default.nix index 5c26af8..9e23384 100644 --- a/nixhome/modules/scraping/default.nix +++ b/nixhome/modules/scraping/default.nix @@ -793,6 +793,7 @@ async function __hmMove(page, tx, ty) {\ exec ${pkgs.chromium}/bin/chromium \ --user-data-dir="''${CHROMIUM_PROFILE_PATH:-$HOME/.chrome-''${APP_NAME:-default}}" \ --no-sandbox \ + --disable-infobars \ --disable-gpu \ --disable-dev-shm-usage \ "$@" From d899b1d1b003f8deab76daffdcbe34782aa1a0dd Mon Sep 17 00:00:00 2001 From: Dmitry Kireev Date: Tue, 21 Apr 2026 05:48:38 +0000 Subject: [PATCH 3/4] Vagrant engine live; cell models shows cloud + local; cell login redesigned for bot-detection avoidance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - feat(vagrant_runner): implement vagrant engine for cell claude/build/rdp/vnc — AI tools now run inside Vagrant VMs via ssh; was previously a "not yet implemented" stub - feat(runner/vagrant): add all vagrant primitives (VagrantEnsureUp, VagrantProvision, VagrantUploadNixhome, VagrantBinaryExists, VagrantEnsureGUI, VagrantRunningCells, VagrantReadForwardedPort, BuildVagrantSSHArgv) — required for vagrant engine - feat(scaffold): add ScaffoldLinuxVagrantfile + ScaffoldVagrantLinuxStack and Vagrantfile.linux.tmpl — scaffolds Debian ARM64 + Nix VMs for UTM/libvirt providers - feat(nixhome): add hosts/linux/home.nix + hosts/linux/stack.nix for vagrant-linux home-manager target — enables nix provisioning inside vagrant VMs - feat(models): add OpenRouter cloud model integration — cell models now shows Anthropic/OpenAI/Google models without requiring a local ollama instance - feat(cloudmodels): new cloudmodels package with OpenRouter API client and latest-gen/trusted-provider filters — no user-facing impact - feat(cache): new on-disk TTL cache (XDG_CACHE_HOME/devcell/) for SWE-bench scores and OpenRouter listings — cell models is faster on repeat invocations - feat(models): add --source=local|cloud|all flag and interactive sortable table — users can filter by source and sort by rating, speed, or size - feat(claude): auto-select best ollama model as ANTHROPIC_MODEL when use_ollama is enabled and no model is pinned in config — cell claude no longer silently uses wrong model - feat(chrome): redesign login to two-phase flow (clean browser → login → close → headless CDP extraction) — avoids bot detection that triggered on CDP-enabled sessions - feat(chrome): add --force flag to wipe saved browser profile for a fresh login — users no longer need to manually delete profile directories - feat(chrome): extract localStorage alongside cookies into storage-state.json — sites using localStorage auth tokens work correctly with Playwright - feat(chrome): save full fingerprint (UA, platform, brands) from real Chrome binary instead of hardcoded defaults — Patchright now uses matching identity - feat(rdp): add vagrant VM discovery to cell rdp/vnc list and auto-connect — vagrant cells appear alongside docker cells in picker - feat(cfg): add engine, vagrant_provider, vagrant_box, docker_privileged config fields — users can set engine=vagrant and docker_privileged=true in .devcell.toml - feat(runner): add --privileged docker run flag when docker_privileged=true in config — required for /dev/kvm and Android emulation - feat(nixhome): add android.nix module with Android SDK, adb, apktool, jadx — Android development and reverse engineering support - chore(nixhome): add platformio to electronics.nix — embedded development support - fix(runner): always use --progress=plain for docker build to capture full nix error output — build errors now show actionable hints instead of silent failure - feat(swebench): add NormalizeCloudID to map OpenRouter model IDs to SWE-bench keys — cloud models get real SWE scores where available - feat(scraping): inject window.__cellFp platform spoof in Patchright from cell login fingerprint — navigator.platform/userAgentData matches host Chrome - test(vagrant): add unit tests for VagrantReadForwardedPort, ParseVagrantGlobalStatus, ParseVagrantPortOutput, VagrantBinaryExists - test(claude): add tests for ANTHROPIC_MODEL injection from config (prefix stripping, flag+config, no ollama fallback) - test(rdp): add tests for vagrant-named entries and mixed docker+vagrant listings - test(cache): add tests for TTL expiry, missing file, corrupt file, and XDG_CACHE_HOME resolution --- .devcell.toml | 8 +- cmd/apparg.go | 28 +- cmd/build.go | 42 ++ cmd/chrome.go | 509 +++++++++++------- cmd/claude.go | 117 +++- cmd/claude_test.go | 113 ++++ cmd/initflow.go | 3 +- cmd/models.go | 399 +++++++++----- cmd/models_format_test.go | 26 +- cmd/models_test.go | 11 +- cmd/opencode.go | 2 +- cmd/rdp.go | 183 ++++--- cmd/rdp_format_test.go | 58 ++ cmd/root.go | 42 +- cmd/vagrant_runner.go | 328 +++++++++++ cmd/vagrant_test.go | 89 ++- cmd/vnc.go | 194 ++++--- cmd/vnc_format_test.go | 58 ++ internal/cache/cache.go | 75 +++ internal/cache/cache_test.go | 65 +++ internal/cfg/cfg.go | 23 +- internal/cloudmodels/openrouter.go | 184 +++++++ internal/cloudmodels/openrouter_test.go | 131 +++++ internal/cloudmodels/pipeline_test.go | 223 ++++++++ internal/ollama/hardware.go | 53 ++ internal/ollama/hardware_darwin.go | 11 + internal/ollama/hardware_linux.go | 3 + internal/ollama/hardware_test.go | 26 + internal/ollama/ollama.go | 134 ++++- internal/ollama/ollama_test.go | 74 ++- internal/ollama/ratings.go | 122 +++++ internal/ollama/ratings_test.go | 100 ++++ internal/ollama/swebench.go | 28 + internal/ollama/swebench_test.go | 25 + internal/runner/runner.go | 27 +- internal/runner/runner_test.go | 12 +- internal/runner/vagrant.go | 430 +++++++++++++++ internal/runner/vagrant_test.go | 491 +++++++++++++++++ internal/scaffold/scaffold.go | 98 ++++ internal/scaffold/scaffold_test.go | 8 +- .../scaffold/templates/Vagrantfile.linux.tmpl | 287 ++++++++++ internal/scaffold/vagrant_linux_test.go | 165 ++++++ internal/ux/build_errors.go | 146 +++++ internal/ux/build_errors_test.go | 103 ++++ internal/ux/table.go | 197 +++++++ nixhome/flake.nix | 51 +- nixhome/hosts/linux/home.nix | 16 + nixhome/hosts/linux/stack.nix | 6 + nixhome/modules/android.nix | 50 ++ nixhome/modules/electronics.nix | 1 + nixhome/modules/scraping/default.nix | 358 +++++++++++- nixhome/modules/security.nix | 4 + nixhome/stacks/ultimate.nix | 1 + test/testdata/openrouter_models.json | 69 +++ test/testdata/swebench_leaderboards.json | 69 +++ 55 files changed, 5454 insertions(+), 622 deletions(-) create mode 100644 cmd/vagrant_runner.go create mode 100644 internal/cache/cache.go create mode 100644 internal/cache/cache_test.go create mode 100644 internal/cloudmodels/openrouter.go create mode 100644 internal/cloudmodels/openrouter_test.go create mode 100644 internal/cloudmodels/pipeline_test.go create mode 100644 internal/ollama/ratings_test.go create mode 100644 internal/runner/vagrant.go create mode 100644 internal/runner/vagrant_test.go create mode 100644 internal/scaffold/templates/Vagrantfile.linux.tmpl create mode 100644 internal/scaffold/vagrant_linux_test.go create mode 100644 internal/ux/build_errors.go create mode 100644 internal/ux/build_errors_test.go create mode 100644 internal/ux/table.go create mode 100644 nixhome/hosts/linux/home.nix create mode 100644 nixhome/hosts/linux/stack.nix create mode 100644 nixhome/modules/android.nix create mode 100644 test/testdata/openrouter_models.json create mode 100644 test/testdata/swebench_leaderboards.json diff --git a/.devcell.toml b/.devcell.toml index c51f148..fd3128b 100644 --- a/.devcell.toml +++ b/.devcell.toml @@ -5,12 +5,14 @@ # Base stack (one of: base, go, node, python, fullstack, electronics, ultimate) stack = "ultimate" # -# Addon modules (from nixhome/modules/): desktop, electronics, financial, +# Addon modules (from nixhome/modules/): android, desktop, electronics, financial, # graphics, infra, news, nixos, qa-tools, scraping, travel, go, node, python # modules = ["electronics", "desktop"] # # Disable GUI (Xvfb + VNC + browser). GUI is enabled by default. # gui = false +# Run container with Docker --privileged flag. Default: false. +# docker_privileged = true # Timezone (IANA format). If omitted, inherits host $TZ. # timezone = "Europe/Prague" @@ -38,8 +40,8 @@ stack = "ultimate" # 1Password documents whose fields are passed into the container as env vars. # Requires `op` CLI on the host. Each field in the document becomes an env var: # e.g. a field labeled "API_KEY" with value "sk-123" → env var API_KEY=sk-123. -# [op] -# documents = ["prod-api-keys", "dev-secrets"] +[op] +documents = ["prod-devcell-common"] # AWS credential scoping. When true, credentials are scoped to read-only # via IAM session policy. All AWS tools (cli, terraform, SDKs, MCP servers) diff --git a/cmd/apparg.go b/cmd/apparg.go index ca8652d..038141a 100644 --- a/cmd/apparg.go +++ b/cmd/apparg.go @@ -2,6 +2,7 @@ package main import ( "bytes" + "fmt" "os/exec" "sort" "strings" @@ -57,14 +58,29 @@ func parseContainerNames(output string) []string { } // selectCell shows an interactive picker when multiple cells are running. -// Returns the selected AppName. +// Labels show " docker" or " vagrant". Returns the selected key. func selectCell(apps map[string]string) (string, error) { - var names []string - for name := range apps { - names = append(names, name) + var keys []string + for key := range apps { + keys = append(keys, key) + } + sort.Strings(keys) + opts := make([]ux.SelectOption, len(keys)) + for i, key := range keys { + var displayName, cellType string + if strings.HasPrefix(key, "vagrant-") { + displayName = strings.TrimPrefix(key, "vagrant-") + cellType = "vagrant" + } else { + displayName = key + cellType = "docker" + } + opts[i] = ux.SelectOption{ + Label: fmt.Sprintf("%-28s %s", displayName, cellType), + Value: key, + } } - sort.Strings(names) - return ux.GetSelection("Multiple cells found — select one", names) + return ux.GetSelectionKV("Multiple cells found — select one", opts) } // completeRunningApps provides shell completion for running cell container names. diff --git a/cmd/build.go b/cmd/build.go index 19db054..6c41162 100644 --- a/cmd/build.go +++ b/cmd/build.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "os" "github.com/DimmKirr/devcell/internal/cfg" "github.com/DimmKirr/devcell/internal/config" @@ -31,6 +32,47 @@ func runBuild(cmd *cobra.Command, _ []string) error { return fmt.Errorf("load config: %w", err) } + // ── Vagrant engine ──────────────────────────────────────────────────────── + // cell build --engine=vagrant → vagrant provision (re-applies nixhome flake) + // cell build --update --engine=vagrant → nix flake update inside VM, then provision + engine := scanStringFlag("--engine") + if scanFlag("--macos") { + engine = "vagrant" + } + if engine == "vagrant" { + cellCfgVagrant := cfg.LoadFromOS(c.ConfigDir, c.BaseDir) + vagrantBox := scanStringFlag("--vagrant-box") + if vagrantBox == "" { + vagrantBox = "utm/bookworm" + } + vagrantProvider := scanStringFlag("--vagrant-provider") + if vagrantProvider == "" { + vagrantProvider = "utm" + } + // Scaffold Vagrantfile idempotently (same as runVagrantAgent step 1). + nixhomeDir := resolveVagrantNixhome(c.BaseDir) + if nixhomeDir == "" { + nixhomeDir = c.BaseDir + "/nixhome" + } + vmConfigDir := os.Getenv("DEVCELL_CONFIG_DIR") + if vmConfigDir == "" { + vmConfigDir = c.HostHome + "/.config/devcell" + } + // Always regenerate Vagrantfile on build (ports, stack may have changed). + os.Remove(c.BuildDir + "/Vagrantfile") + if err := scaffold.ScaffoldLinuxVagrantfile( + c.BuildDir, vagrantBox, vagrantProvider, + cellCfgVagrant.Cell.ResolvedStack(), + c.BaseDir, nixhomeDir, + c.VNCPort, c.RDPPort, + c.HostHome, vmConfigDir, + ); err != nil { + fmt.Fprintf(os.Stderr, "warning: vagrantfile scaffold failed: %v\n", err) + } + return runVagrantBuild(c.BuildDir, c.BaseDir, cellCfgVagrant, update, scanFlag("--dry-run")) + } + + // ── Docker engine (default) ─────────────────────────────────────────────── if err := config.EnsureBuildDir(c.BuildDir); err != nil { return fmt.Errorf("ensure build dir: %w", err) } diff --git a/cmd/chrome.go b/cmd/chrome.go index e5e6d47..88c6275 100644 --- a/cmd/chrome.go +++ b/cmd/chrome.go @@ -4,8 +4,6 @@ import ( "bufio" "encoding/json" "fmt" - "io" - "net/http" "os" "os/exec" "path/filepath" @@ -20,12 +18,11 @@ import ( ) var ( - chromeSyncOnly bool - chromeNoSync bool + chromeSyncOnly bool + chromeNoSync bool + chromeForce bool ) -const chromeDebugPort = "19222" - var chromeCmd = &cobra.Command{ Use: "chrome [app-name] [-- urls...]", Short: "Open Chromium with a project-scoped profile and sync cookies to Playwright", @@ -69,6 +66,8 @@ Examples: func init() { chromeCmd.Flags().BoolVar(&chromeSyncOnly, "sync", false, "sync cookies only (don't open browser)") chromeCmd.Flags().BoolVar(&chromeNoSync, "no-sync", false, "open browser without syncing cookies on close") + chromeCmd.Flags().BoolVar(&chromeForce, "force", false, "wipe saved browser profile and force a fresh login") + loginCmd.Flags().BoolVar(&chromeForce, "force", false, "wipe saved browser profile and force a fresh login") } // chromeBinary returns the path to the best available Chromium/Chrome binary. @@ -113,10 +112,18 @@ func runChrome(cmd *cobra.Command, args []string) error { ux.Debugf("storage-state: %s", storageStatePath) if chromeSyncOnly { - // --sync without browser: re-extract from a running Chrome or error. return fmt.Errorf("--sync requires a running browser; use 'cell chrome' or 'cell login' instead") } + if chromeForce { + if _, err := os.Stat(chromeProfile); err == nil { + ux.Info("Wiping saved browser profile for fresh login...") + if err := os.RemoveAll(chromeProfile); err != nil { + return fmt.Errorf("wipe profile: %w", err) + } + } + } + if !chromeSyncOnly { if err := openExtractAndClose(chromeProfile, storageStatePath, urls, chromeNoSync); err != nil { return err @@ -144,14 +151,25 @@ type storageStateCookie struct { SameSite string `json:"sameSite"` } +type storageStateOrigin struct { + Origin string `json:"origin"` + LocalStorage []localStorageEntry `json:"localStorage"` +} + +type localStorageEntry struct { + Name string `json:"name"` + Value string `json:"value"` +} + type storageState struct { - Cookies []storageStateCookie `json:"cookies"` - Origins []struct{} `json:"origins"` + Cookies []storageStateCookie `json:"cookies"` + Origins []storageStateOrigin `json:"origins"` } -// openExtractAndClose launches Chromium with CDP, waits for user to press -// Enter, extracts cookies via DevTools Protocol (decrypted values), writes -// storage-state.json, then closes Chrome. +// openExtractAndClose opens Chrome for the user to log in (no CDP, no special +// flags — clean session that won't trigger bot detection), waits for Enter, +// closes the login browser, then launches a headless CDP-only instance against +// the same profile to extract cookies via Network.getAllCookies, and closes it. func openExtractAndClose(profile, storageStatePath string, urls []string, noSync bool) error { bin, err := chromeBinary() if err != nil { @@ -159,38 +177,35 @@ func openExtractAndClose(profile, storageStatePath string, urls []string, noSync } ux.Debugf("browser: %s", bin) - // Read Playwright's fingerprint to spoof host Chrome, so session-bound - // sites (BA, banks) bind cookies to Playwright's fingerprint, not the host's. - playwrightUA := readPlaywrightFingerprint(filepath.Dir(filepath.Dir(profile))) - if playwrightUA == "" { - // Bootstrap: query a running container for the UA, or use known default. - playwrightUA = getPlaywrightUA(storageStatePath) + // Save Chrome's real fingerprint for Patchright so both use the same identity. + if readPlaywrightFingerprint(filepath.Dir(filepath.Dir(profile))) == nil { + ensureFingerprint(bin, storageStatePath) } - argv := []string{ + // Phase 1: login browser — no CDP, no special flags. + loginArgv := []string{ "--user-data-dir=" + profile, - "--remote-debugging-port=" + chromeDebugPort, - } - if playwrightUA != "" { - argv = append(argv, "--user-agent="+playwrightUA) - ux.Debugf("spoofing UA: %s", playwrightUA) + "--no-first-run", + "--no-default-browser-check", } - argv = append(argv, urls...) + loginArgv = append(loginArgv, urls...) browserName := filepath.Base(filepath.Dir(filepath.Dir(filepath.Dir(bin)))) if browserName == "" || browserName == "." { browserName = filepath.Base(bin) } ux.Info(fmt.Sprintf("Opening %s", browserName)) + ux.Debugf("binary: %s", bin) + ux.Debugf("args: %s", strings.Join(loginArgv, " ")) ux.Debugf("profile: %s", profile) - proc := exec.Command(bin, argv...) + proc := exec.Command(bin, loginArgv...) proc.Stdout = os.Stdout if ux.Verbose { proc.Stderr = os.Stderr } if err := proc.Start(); err != nil { - return fmt.Errorf("start chromium: %w", err) + return fmt.Errorf("start browser: %w", err) } ux.Debugf("PID: %d", proc.Process.Pid) @@ -209,22 +224,6 @@ func openExtractAndClose(profile, storageStatePath string, urls []string, noSync select { case <-enterCh: fmt.Println() - - if !noSync { - // Extract cookies via CDP before closing Chrome. - sp := ux.NewProgressSpinner("Extracting cookies via DevTools") - - // Navigate to about:blank first so no site JS is running. - cdpNavigateBlank() - - count, sites, err := extractCookiesViaCDP(storageStatePath) - if err != nil { - sp.Fail(fmt.Sprintf("cookie extraction failed: %v", err)) - } else { - sp.Success(fmt.Sprintf("Exported %d cookies for %s", count, sites)) - } - } - ux.Info("Closing browser...") if err := proc.Process.Signal(syscall.SIGTERM); err != nil { ux.Debugf("SIGTERM failed: %v, sending SIGKILL", err) @@ -232,196 +231,278 @@ func openExtractAndClose(profile, storageStatePath string, urls []string, noSync } select { case <-done: - ux.Debugf("Chromium exited gracefully") + ux.Debugf("browser exited gracefully") case <-time.After(5 * time.Second): ux.Debugf("graceful shutdown timed out, killing") proc.Process.Kill() <-done } + if !noSync { + spMsg := "Extracting cookies" + if len(urls) > 0 { + spMsg = "Refreshing session and extracting cookies" + } + sp := ux.NewProgressSpinner(spMsg) + count, sites, err := extractCookiesViaCDP(bin, profile, storageStatePath, urls) + if err != nil { + sp.Fail(fmt.Sprintf("cookie extraction failed: %v", err)) + } else { + sp.Success(fmt.Sprintf("Exported %d cookies for %s", count, sites)) + } + } + case err := <-done: if err != nil { - ux.Debugf("Chromium exited: %v", err) + ux.Debugf("browser exited: %v", err) } ux.Info("Browser closed.") if !noSync { ux.Warn("Browser closed before cookie extraction — no cookies synced.") } - } - - return nil -} - -// cdpCall makes a CDP HTTP request to the browser's debugging endpoint. -func cdpCall(method, path string, body io.Reader) ([]byte, error) { - url := "http://127.0.0.1:" + chromeDebugPort + path - req, err := http.NewRequest(method, url, body) - if err != nil { - return nil, err - } - if body != nil { - req.Header.Set("Content-Type", "application/json") - } - client := &http.Client{Timeout: 5 * time.Second} - resp, err := client.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - return io.ReadAll(resp.Body) -} -// cdpNavigateBlank navigates the first tab to about:blank via CDP so no -// site JavaScript is running during cookie extraction. -func cdpNavigateBlank() { - // Get first tab's webSocket debugger URL. - data, err := cdpCall("GET", "/json", nil) - if err != nil { - ux.Debugf("CDP /json failed: %v", err) - return } - var tabs []struct { - ID string `json:"id"` - } - if err := json.Unmarshal(data, &tabs); err != nil || len(tabs) == 0 { - ux.Debugf("CDP no tabs found") - return - } - - // Navigate first tab to about:blank via HTTP endpoint. - _, err = cdpCall("GET", "/json/navigate/"+tabs[0].ID+"?url=about:blank", nil) - if err != nil { - ux.Debugf("CDP navigate failed: %v", err) - } - // Small delay for navigation to complete. - time.Sleep(200 * time.Millisecond) + return nil } -// extractCookiesViaCDP connects to Chrome's DevTools Protocol HTTP endpoint -// and retrieves all cookies with decrypted values. Writes storage-state.json. -func extractCookiesViaCDP(dstPath string) (int, string, error) { - // CDP HTTP API: /json/protocol doesn't expose Network.getAllCookies directly. - // But we can use the /json/new endpoint to get a debugging target, then use - // the HTTP-based CDP commands. Actually, the simplest approach is to use - // the /json endpoint to list pages, then use fetch to call CDP via - // the page's DevTools URL. - - // Simpler: use Chrome's built-in /json endpoints and a JavaScript evaluation - // approach. But the cleanest is: Chrome exposes cookies at a hidden endpoint. - - // Actually the simplest reliable approach: use the CDP WebSocket. - // But for simplicity, let's use the chrome.debugger HTTP API. - - // The most practical approach: use /json to get a target, then use - // the CDP REST-like endpoint: POST to send CDP command. - - // Let's use the approach of evaluating JS via CDP to get cookies. - // This works because we navigated to about:blank. - - // Get targets. - data, err := cdpCall("GET", "/json", nil) - if err != nil { - return 0, "", fmt.Errorf("CDP connection failed (is Chrome running?): %w", err) +// extractCookiesViaCDP launches a headless Chrome against the same profile with +// --remote-debugging-port, calls Network.getAllCookies via a Node.js WebSocket +// script, writes storage-state.json, then kills the headless instance. +// CDP is safe here: it runs after the login session ends, so bot detection +// (Kasada/Cloudflare) never sees the debugging port. +// If urls is non-empty, the headless browser navigates to urls[0] first so the +// server can re-issue short-lived auth tokens (e.g. Hyatt's 5-min oscar JWT) +// before cookies are extracted. +func extractCookiesViaCDP(bin, profile, storageStatePath string, urls []string) (int, string, error) { + const cdpPort = "9222" + + // Phase 2: headless CDP browser — same profile, no visible window. + cdpArgv := []string{ + "--user-data-dir=" + profile, + "--no-first-run", + "--no-default-browser-check", + "--headless=new", + "--remote-debugging-port=" + cdpPort, + "about:blank", } + ux.Debugf("CDP browser args: %s", strings.Join(cdpArgv, " ")) - var targets []struct { - WebSocketDebuggerURL string `json:"webSocketDebuggerUrl"` - ID string `json:"id"` - Type string `json:"type"` + cdpProc := exec.Command(bin, cdpArgv...) + if ux.Verbose { + cdpProc.Stderr = os.Stderr } - if err := json.Unmarshal(data, &targets); err != nil { - return 0, "", fmt.Errorf("parse CDP targets: %w", err) + if err := cdpProc.Start(); err != nil { + return 0, "", fmt.Errorf("start CDP browser: %w", err) } + defer func() { + cdpProc.Process.Kill() + cdpProc.Wait() + }() - // Find a page target. - var targetID string - for _, t := range targets { - if t.Type == "page" { - targetID = t.ID + // Wait for CDP to be ready (poll /json/version). + cdpBase := "http://localhost:" + cdpPort + var wsURL string + for i := 0; i < 20; i++ { + time.Sleep(300 * time.Millisecond) + data, err := cdpGet(cdpBase + "/json") + if err != nil { + ux.Debugf("CDP not ready yet: %v", err) + continue + } + var targets []struct { + WebSocketDebuggerURL string `json:"webSocketDebuggerUrl"` + Type string `json:"type"` + } + if err := json.Unmarshal(data, &targets); err != nil { + continue + } + for _, t := range targets { + if t.Type == "page" && t.WebSocketDebuggerURL != "" { + wsURL = t.WebSocketDebuggerURL + break + } + } + if wsURL != "" { break } } - if targetID == "" { - return 0, "", fmt.Errorf("no page target found in CDP") + if wsURL == "" { + return 0, "", fmt.Errorf("CDP not ready after timeout") } + ux.Debugf("CDP WebSocket: %s", wsURL) - // Use the CDP HTTP protocol command endpoint. - // Chrome DevTools Protocol over HTTP: we need WebSocket for commands. - // The simpler alternative: use an external tool like `chrome-remote-interface` - // or just shell out to a small script. + return extractCookiesViaScript(wsURL, storageStatePath, urls) +} - // Simplest reliable approach: use Node.js (available on macOS) to connect - // via WebSocket and call Network.getAllCookies. - return extractCookiesViaScript(targets[0].WebSocketDebuggerURL, dstPath) +// cdpGet performs an HTTP GET to the CDP endpoint. +func cdpGet(url string) ([]byte, error) { + out, err := exec.Command("curl", "-sf", "--max-time", "2", url).Output() + if err != nil { + return nil, err + } + return out, nil } // extractCookiesViaScript uses a Node.js one-liner to connect to Chrome CDP // WebSocket and extract all cookies via Network.getAllCookies. -func extractCookiesViaScript(wsURL, dstPath string) (int, string, error) { - // Check if Node.js is available (it is on macOS). +// If urls is non-empty, it navigates to urls[0] first so the server can +// re-issue short-lived auth tokens before the cookie snapshot is taken. +func extractCookiesViaScript(wsURL, dstPath string, urls []string) (int, string, error) { nodePath, err := exec.LookPath("node") if err != nil { return 0, "", fmt.Errorf("node not found (required for CDP cookie extraction): %w", err) } ux.Debugf("using node: %s", nodePath) - ux.Debugf("CDP WebSocket: %s", wsURL) - // Node.js 22+ has built-in WebSocket (no npm packages needed). + navigateTo := "" + if len(urls) > 0 { + navigateTo = urls[0] + } + + // Node.js 22+ has built-in WebSocket — no npm packages needed. + // Extracts cookies + localStorage from the active Chrome profile via CDP. + // If navigateTo is set: enables Page events, navigates to the URL, waits for + // loadEventFired so the server can refresh short-lived tokens (e.g. Hyatt oscar), + // then extracts cookies AND localStorage for every frame origin on the page. + // Output is Playwright storage-state JSON format (cookies + origins[].localStorage). script := fmt.Sprintf(` const ws = new WebSocket(%q); +const navigateTo = %q; + +let cookies = null; +let origins = null; // set after Page.getFrameTree response +let lsData = {}; // origin -> [{name,value}] +let lsPending = 0; // outstanding DOMStorage requests + +function tryDone() { + if (cookies === null || origins === null || lsPending > 0) return; + const state = { + cookies, + origins: origins + .filter(o => lsData[o] && lsData[o].length > 0) + .map(o => ({origin: o, localStorage: lsData[o]})) + }; + process.stdout.write(JSON.stringify(state)); + ws.close(); +} + +function fetchAll() { + ws.send(JSON.stringify({id:20, method:'Network.getAllCookies'})); + if (navigateTo) { + ws.send(JSON.stringify({id:15, method:'DOMStorage.enable'})); + ws.send(JSON.stringify({id:30, method:'Page.getFrameTree'})); + } else { + origins = []; + tryDone(); + } +} + ws.onopen = () => { - ws.send(JSON.stringify({id: 1, method: 'Network.getAllCookies'})); + if (navigateTo) { + ws.send(JSON.stringify({id:1, method:'Page.enable'})); + } else { + fetchAll(); + } }; + ws.onmessage = (event) => { - const msg = JSON.parse(event.data); - if (msg.id === 1) { - const cookies = (msg.result && msg.result.cookies) || []; - const state = { - cookies: cookies.map(c => ({ - name: c.name, - value: c.value, + const m = JSON.parse(event.data); + + if (m.id === 1) { + // Page.enable done — navigate; 20s safety fallback if load event never fires. + ws.send(JSON.stringify({id:2, method:'Page.navigate', params:{url:navigateTo}})); + setTimeout(() => { if (cookies === null) fetchAll(); }, 20000); + return; + } + + if (m.method === 'Page.loadEventFired') { + fetchAll(); + return; + } + + if (m.id === 20) { + // Network.getAllCookies response + const raw = (m.result && m.result.cookies) || []; + cookies = raw.map(c => { + const ss = c.sameSite || 'Lax'; + const secure = (ss === 'None') ? true : !!c.secure; + return { + name: c.name, value: c.value, domain: c.domain, path: c.path, expires: c.expires === -1 ? -1 : c.expires, - httpOnly: c.httpOnly, - secure: c.secure, - sameSite: (!c.secure && (!c.sameSite || c.sameSite === "None")) ? "Lax" : (c.sameSite || "Lax") - })), - origins: [] - }; - process.stdout.write(JSON.stringify(state)); - ws.close(); + httpOnly: !!c.httpOnly, secure, + sameSite: (!secure && ss === 'None') ? 'Lax' : ss + }; + }); + tryDone(); + return; + } + + if (m.id === 30 && m.result) { + // Page.getFrameTree — collect unique https/http origins from all frames. + const seen = new Set(); + function collect(node) { + if (node && node.frame && node.frame.url) { + try { + const u = new URL(node.frame.url); + if ((u.protocol === 'https:' || u.protocol === 'http:') && !seen.has(u.origin)) { + seen.add(u.origin); + } + } catch(e) {} + } + (node.childFrames || []).forEach(collect); + } + collect(m.result.frameTree); + origins = [...seen]; + lsPending = origins.length; + if (lsPending === 0) { tryDone(); return; } + origins.forEach((o, i) => { + ws.send(JSON.stringify({id: 100+i, method:'DOMStorage.getDOMStorageItems', + params:{storageId:{securityOrigin:o, isLocalStorage:true}}})); + }); + return; + } + + if (m.id >= 100 && origins && m.id < 100 + origins.length) { + // DOMStorage.getDOMStorageItems response for origins[id-100]. + const i = m.id - 100; + const entries = (m.result && m.result.entries) || []; + lsData[origins[i]] = entries.map(([name, value]) => ({name, value})); + lsPending--; + tryDone(); + return; } }; -ws.onerror = (e) => { process.stderr.write(String(e.message || e)); process.exit(1); }; -`, wsURL) + +ws.onerror = (e) => { process.stderr.write(String(e.message||e)); process.exit(1); }; +`, wsURL, navigateTo) cmd := exec.Command(nodePath, "-e", script) - cmd.Stderr = os.Stderr + if ux.Verbose { + cmd.Stderr = os.Stderr + } out, err := cmd.Output() if err != nil { return 0, "", fmt.Errorf("CDP script failed: %w", err) } - // Validate the output is valid JSON. var state storageState if err := json.Unmarshal(out, &state); err != nil { return 0, "", fmt.Errorf("invalid CDP output: %w", err) } - // Atomic write. tmpFile := dstPath + ".tmp" formatted, _ := json.MarshalIndent(state, "", " ") if err := os.WriteFile(tmpFile, formatted, 0600); err != nil { - return 0, "", fmt.Errorf("write temp file: %w", err) + return 0, "", fmt.Errorf("write: %w", err) } if err := os.Rename(tmpFile, dstPath); err != nil { os.Remove(tmpFile) return 0, "", fmt.Errorf("rename: %w", err) } - // Build domain list. domainSet := make(map[string]bool) for _, c := range state.Cookies { domainSet[c.Domain] = true @@ -430,7 +511,6 @@ ws.onerror = (e) => { process.stderr.write(String(e.message || e)); process.exit for d := range domainSet { domains = append(domains, d) } - return len(state.Cookies), strings.Join(domains, ", "), nil } @@ -456,44 +536,91 @@ func isURL(s string) bool { const fingerprintFile = "playwright-fingerprint.json" -// Default Playwright UA — matches patchright's bundled Chromium 141 with -// the stealth init script's Windows spoofing. Updated when queried from -// a running container. -const defaultPlaywrightUA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36" +// playwrightFingerprint holds the full browser fingerprint saved for Patchright. +type playwrightFingerprint struct { + UserAgent string `json:"userAgent"` + Platform string `json:"platform"` // "MacIntel" + UAPlatform string `json:"uaPlatform"` // "macOS" + Version string `json:"version"` // e.g. "147.0.7453.0" + Brands []fpBrand `json:"brands"` +} + +type fpBrand struct { + Brand string `json:"brand"` + Version string `json:"version"` +} + +// chromeFingerprint runs ` --version` to get the real version (e.g. "Google Chrome 147.0.7453.0") +// and builds a full macOS fingerprint. Chrome always reports 10_15_7 regardless of actual macOS +// version — that's Chrome's own fingerprinting behaviour, not a spoof. +// Returns nil on error. +func chromeFingerprint(bin string) *playwrightFingerprint { + out, err := exec.Command(bin, "--version").Output() + if err != nil { + return nil + } + // Output: "Google Chrome 147.0.7453.0\n" or "Chromium 147.0.7453.0\n" + parts := strings.Fields(strings.TrimSpace(string(out))) + if len(parts) == 0 { + return nil + } + version := parts[len(parts)-1] + major := version + if idx := strings.Index(version, "."); idx >= 0 { + major = version[:idx] + } + ua := "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/" + version + " Safari/537.36" + return &playwrightFingerprint{ + UserAgent: ua, + Platform: "MacIntel", + UAPlatform: "macOS", + Version: version, + Brands: []fpBrand{ + {Brand: "Google Chrome", Version: major}, + {Brand: "Chromium", Version: major}, + {Brand: "Not/A)Brand", Version: "8"}, + }, + } +} -// getPlaywrightUA tries to get the UA from a running container via docker exec, -// falls back to the known default. Saves to fingerprint file for future use. -func getPlaywrightUA(storageStatePath string) string { +func ensureFingerprint(bin, storageStatePath string) *playwrightFingerprint { cellHome := filepath.Dir(storageStatePath) - ua := defaultPlaywrightUA - ux.Debugf("using Playwright UA: %s", ua) - savePlaywrightFingerprint(cellHome, ua) - return ua + fp := chromeFingerprint(bin) + if fp == nil { + // Fallback: generic recent macOS Chrome fingerprint — matches Client Hints platform. + fp = &playwrightFingerprint{ + UserAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36", + Platform: "MacIntel", + UAPlatform: "macOS", + Version: "147.0.0.0", + Brands: []fpBrand{ + {Brand: "Google Chrome", Version: "147"}, + {Brand: "Chromium", Version: "147"}, + {Brand: "Not/A)Brand", Version: "8"}, + }, + } + } + ux.Debugf("fingerprint UA: %s", fp.UserAgent) + savePlaywrightFingerprint(cellHome, fp) + return fp } -// readPlaywrightFingerprint reads the cached Playwright UA string from -// $CELL_HOME/playwright-fingerprint.json. Returns empty string if not found. -func readPlaywrightFingerprint(cellHome string) string { +func readPlaywrightFingerprint(cellHome string) *playwrightFingerprint { data, err := os.ReadFile(filepath.Join(cellHome, fingerprintFile)) if err != nil { - return "" - } - var fp struct { - UserAgent string `json:"userAgent"` + return nil } + var fp playwrightFingerprint if err := json.Unmarshal(data, &fp); err != nil { - return "" + return nil + } + if fp.UserAgent == "" { + return nil } - return fp.UserAgent + return &fp } -// savePlaywrightFingerprint writes Playwright's fingerprint to -// $CELL_HOME/playwright-fingerprint.json. Called on first run when no -// fingerprint exists yet — queries a running Playwright via httpbin. -func savePlaywrightFingerprint(cellHome, ua string) { - fp := struct { - UserAgent string `json:"userAgent"` - }{UserAgent: ua} +func savePlaywrightFingerprint(cellHome string, fp *playwrightFingerprint) { data, _ := json.MarshalIndent(fp, "", " ") path := filepath.Join(cellHome, fingerprintFile) tmpFile := path + ".tmp" diff --git a/cmd/claude.go b/cmd/claude.go index 02a3a34..b78f579 100644 --- a/cmd/claude.go +++ b/cmd/claude.go @@ -1,11 +1,15 @@ package main import ( + "context" "fmt" "os" + "strings" + "time" "github.com/DimmKirr/devcell/internal/cfg" "github.com/DimmKirr/devcell/internal/config" + "github.com/DimmKirr/devcell/internal/ollama" "github.com/spf13/cobra" ) @@ -22,6 +26,10 @@ Use --ollama to route Claude Code through a local ollama instance to point at ollama on the host. Can also be enabled permanently via use_ollama = true in the [llm] section of devcell.toml. +The model is resolved in order: + 1. [llm.models] default in devcell.toml (e.g. "ollama/qwen3:30b") + 2. Best-ranked model from the running ollama instance (auto-detect) + Examples: cell claude @@ -34,18 +42,22 @@ Examples: } // claudeEnv returns extra env vars for the claude container. -// When --ollama flag or [claude] use_ollama=true is set, it injects -// env vars that redirect Claude Code's API calls to a local ollama instance. +// When --ollama flag or [llm] use_ollama=true is set, it injects env vars +// that redirect Claude Code's API calls to a local ollama instance and +// sets ANTHROPIC_MODEL to the configured or best-available model. func claudeEnv() map[string]string { dbg := scanFlag("--debug") useOllama := scanFlag("--ollama") - if !useOllama { - c, err := config.LoadFromOS() - if err == nil { - cellCfg := cfg.LoadFromOS(c.ConfigDir, c.BaseDir) + // Always load config — needed for both use_ollama and model selection. + var configModel string + c, err := config.LoadFromOS() + if err == nil { + cellCfg := cfg.LoadFromOS(c.ConfigDir, c.BaseDir) + if !useOllama { useOllama = cellCfg.LLM.UseOllama } + configModel = cellCfg.LLM.Models.Default } if !useOllama { @@ -55,9 +67,100 @@ func claudeEnv() map[string]string { if dbg { fmt.Fprintf(os.Stderr, " claude: ollama mode enabled, redirecting API to host ollama\n") } - return map[string]string{ + + env := map[string]string{ "ANTHROPIC_BASE_URL": "http://host.docker.internal:11434", "ANTHROPIC_AUTH_TOKEN": "ollama", "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1", } + + if model := resolveOllamaModel(configModel, dbg); model != "" { + env["ANTHROPIC_MODEL"] = model + } + + return env +} + +// resolveOllamaModel returns the bare ollama model name to use as ANTHROPIC_MODEL. +// Priority: config [llm.models] default > best-ranked model from running ollama. +// Returns "" if no model can be determined (ollama unreachable, no models). +func resolveOllamaModel(configModel string, dbg bool) string { + if configModel != "" { + // Strip "ollama/" prefix produced by FormatActiveTOMLSnippet. + model := strings.TrimPrefix(configModel, "ollama/") + if dbg { + if model != configModel { + fmt.Fprintf(os.Stderr, " claude: model from config: %s (stripped ollama/ prefix from %q)\n", model, configModel) + } else { + fmt.Fprintf(os.Stderr, " claude: model from config: %s\n", model) + } + } + return model + } + + // Auto-detect: probe local ollama and pick the best-ranked model. + if dbg { + fmt.Fprintf(os.Stderr, " claude: no model in config — auto-selecting from local ollama\n") + } + + ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + + if !ollama.Detect(ctx, ollama.DefaultBaseURL) { + if dbg { + fmt.Fprintf(os.Stderr, " claude: ollama not reachable at %s — no model set\n", ollama.DefaultBaseURL) + } + return "" + } + if dbg { + fmt.Fprintf(os.Stderr, " claude: ollama reachable at %s\n", ollama.DefaultBaseURL) + } + + models, err := ollama.FetchModels(ctx, ollama.DefaultBaseURL) + if err != nil { + if dbg { + fmt.Fprintf(os.Stderr, " claude: fetch models failed: %v\n", err) + } + return "" + } + if dbg { + fmt.Fprintf(os.Stderr, " claude: %d model(s) available\n", len(models)) + } + if len(models) == 0 { + return "" + } + + // Rank local models with real system RAM so the composite score + // penalises models that won't fit (same algo as `cell models`). + systemRAM := ollama.GetSystemRAMGB() + if dbg { + fmt.Fprintf(os.Stderr, " claude: system RAM %.0f GB — ranking by composite score (swe×0.6 + speed×0.25) × ram_fit\n", systemRAM) + } + + ranked := ollama.RankModels(models, 0, nil, nil, systemRAM, "") + if len(ranked) == 0 { + return "" + } + + if dbg { + fmt.Fprintf(os.Stderr, " claude: %d model(s) ranked (composite score = swe×0.6 + speed×0.25, ×0.1 if RAM tight):\n", len(ranked)) + for _, r := range ranked { + _, needed := ollama.CheckHardwareSafe(r.ParameterSize, systemRAM) + ramStr := "ok" + if needed > 0 && systemRAM > 0 && needed > systemRAM*0.75 { + ramStr = fmt.Sprintf("tight (%.0fGB needed, %.0fGB avail)", needed, systemRAM) + } else if needed > 0 { + ramStr = fmt.Sprintf("%.0fGB", needed) + } + fmt.Fprintf(os.Stderr, " claude: [%d] %-35s swe=%-5.1f speed=%-6.0f score=%.2f ram=%s\n", + r.Rank, r.Name, r.SWEScore, r.SpeedTPM, r.RecommendedScore, ramStr) + } + top := ranked[0] + fmt.Fprintf(os.Stderr, " claude: picking %s — highest score (%.2f: swe=%.1f, speed=%.0fT/m)\n", + top.Name, top.RecommendedScore, top.SWEScore, top.SpeedTPM) + } + + model := ranked[0].Name + fmt.Printf(" → ollama model: %s (set [llm.models] default in devcell.toml to pin)\n", model) + return model } diff --git a/cmd/claude_test.go b/cmd/claude_test.go index c5fa347..0ac8bcb 100644 --- a/cmd/claude_test.go +++ b/cmd/claude_test.go @@ -110,6 +110,119 @@ use_ollama = true } } +// TestClaude_OllamaConfigModel_WithPrefix verifies that [llm.models] default = "ollama/model" +// injects ANTHROPIC_MODEL with the prefix stripped. +func TestClaude_OllamaConfigModel_WithPrefix(t *testing.T) { + home := scaffoldedHome(t) + + cfgDir := filepath.Join(home, ".config", "devcell") + tomlContent := `[cell] +[llm] +use_ollama = true + +[llm.models] +default = "ollama/qwen3:30b" +` + if err := os.WriteFile(filepath.Join(cfgDir, "devcell.toml"), []byte(tomlContent), 0644); err != nil { + t.Fatal(err) + } + + cmd := exec.Command(binaryPath, "claude", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("claude --dry-run failed: %v\noutput: %s", err, out) + } + + argv := string(out) + if !strings.Contains(argv, "ANTHROPIC_MODEL=qwen3:30b") { + t.Errorf("expected ANTHROPIC_MODEL=qwen3:30b (prefix stripped), got:\n%s", argv) + } + if strings.Contains(argv, "ANTHROPIC_MODEL=ollama/") { + t.Errorf("ollama/ prefix should be stripped from ANTHROPIC_MODEL:\n%s", argv) + } +} + +// TestClaude_OllamaConfigModel_NoPrefix verifies that a model without "ollama/" prefix +// is passed through as-is. +func TestClaude_OllamaConfigModel_NoPrefix(t *testing.T) { + home := scaffoldedHome(t) + + cfgDir := filepath.Join(home, ".config", "devcell") + tomlContent := `[cell] +[llm] +use_ollama = true + +[llm.models] +default = "qwen3:30b" +` + if err := os.WriteFile(filepath.Join(cfgDir, "devcell.toml"), []byte(tomlContent), 0644); err != nil { + t.Fatal(err) + } + + cmd := exec.Command(binaryPath, "claude", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("claude --dry-run failed: %v\noutput: %s", err, out) + } + + argv := string(out) + if !strings.Contains(argv, "ANTHROPIC_MODEL=qwen3:30b") { + t.Errorf("expected ANTHROPIC_MODEL=qwen3:30b, got:\n%s", argv) + } +} + +// TestClaude_OllamaFlag_ConfigModel verifies that --ollama flag also picks up +// [llm.models] default from config (flag + config model should both work). +func TestClaude_OllamaFlag_ConfigModel(t *testing.T) { + home := scaffoldedHome(t) + + cfgDir := filepath.Join(home, ".config", "devcell") + tomlContent := `[cell] +[llm.models] +default = "ollama/deepseek-r1:32b" +` + if err := os.WriteFile(filepath.Join(cfgDir, "devcell.toml"), []byte(tomlContent), 0644); err != nil { + t.Fatal(err) + } + + cmd := exec.Command(binaryPath, "claude", "--ollama", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("claude --ollama --dry-run failed: %v\noutput: %s", err, out) + } + + argv := string(out) + if !strings.Contains(argv, "ANTHROPIC_MODEL=deepseek-r1:32b") { + t.Errorf("expected ANTHROPIC_MODEL=deepseek-r1:32b when --ollama + config model:\n%s", argv) + } +} + +// TestClaude_OllamaNoModel_NoAnthropicModel verifies that without a configured model +// and no reachable ollama, ANTHROPIC_MODEL is not injected. +func TestClaude_OllamaNoModel_NoAnthropicModel(t *testing.T) { + home := scaffoldedHome(t) + + // ollama not running in test env → auto-detect silently returns "" + cmd := exec.Command(binaryPath, "claude", "--ollama", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("claude --ollama --dry-run failed: %v\noutput: %s", err, out) + } + + argv := string(out) + if strings.Contains(argv, "ANTHROPIC_MODEL=") { + t.Errorf("ANTHROPIC_MODEL should not be set when no model configured and ollama unreachable:\n%s", argv) + } +} + // TestClaude_OllamaWithUserArgs verifies that --ollama + user args work together. func TestClaude_OllamaWithUserArgs(t *testing.T) { home := scaffoldedHome(t) diff --git a/cmd/initflow.go b/cmd/initflow.go index f53031a..076a198 100644 --- a/cmd/initflow.go +++ b/cmd/initflow.go @@ -338,7 +338,8 @@ func detectOllamaModels() string { if err != nil || len(models) == 0 { return "" } - ranked := ollama.RankModels(models, 10, nil, nil) + systemRAM := ollama.GetSystemRAMGB() + ranked := ollama.RankModels(models, 10, nil, nil, systemRAM, "") snippet := ollama.FormatActiveTOMLSnippet(ranked) if snippet != "" { fmt.Printf(" Detected ollama with %d models\n", len(ranked)) diff --git a/cmd/models.go b/cmd/models.go index 07f1c6f..baa4e23 100644 --- a/cmd/models.go +++ b/cmd/models.go @@ -3,9 +3,12 @@ package main import ( "context" "fmt" - "log/slog" + "sort" "strings" + "time" + "github.com/DimmKirr/devcell/internal/cache" + "github.com/DimmKirr/devcell/internal/cloudmodels" "github.com/DimmKirr/devcell/internal/ollama" "github.com/DimmKirr/devcell/internal/ux" "github.com/spf13/cobra" @@ -13,12 +16,15 @@ import ( // ModelEntry is the typed representation of a ranked model for JSON/YAML output. type ModelEntry struct { - Rank int `json:"rank" yaml:"rank"` - Name string `json:"name" yaml:"name"` - SWEScore float64 `json:"swe_score" yaml:"swe_score"` - Size string `json:"size" yaml:"size"` - Type string `json:"type" yaml:"type"` - Hardware string `json:"hardware" yaml:"hardware"` + Rank int `json:"rank" yaml:"rank"` + Name string `json:"name" yaml:"name"` + Provider string `json:"provider" yaml:"provider"` + SWEScore float64 `json:"swe_score" yaml:"swe_score"` + Size string `json:"size" yaml:"size"` + Type string `json:"type" yaml:"type"` + SpeedTPM float64 `json:"speed_tpm" yaml:"speed_tpm"` + Hardware string `json:"hardware" yaml:"hardware"` + RecommendedScore float64 `json:"recommended_score" yaml:"recommended_score"` } // Reuse shared styles from ux package. @@ -43,107 +49,131 @@ Falls back to built-in estimates if SWE-bench data is unavailable. Examples: - cell models + cell models # cloud + local downloaded (default) + cell models --source=local # local ollama models only + cell models --source=cloud # cloud models only cell models --debug`, RunE: func(cmd *cobra.Command, args []string) error { - debug, _ := cmd.Flags().GetBool("debug") - log := slog.Default() + applyOutputFlags() ctx := context.Background() baseURL := ollama.DefaultBaseURL - if debug { - log.Debug("Checking ollama at " + baseURL) - } - - if !ollama.Detect(ctx, baseURL) { - ux.Warn("Ollama not detected at " + baseURL) - ux.Info("Install ollama: https://ollama.com/download") - return nil - } + source, _ := cmd.Flags().GetString("source") + wantLocal := source == "local" || source == "all" + wantCloud := source == "cloud" || source == "all" - if debug { - log.Debug("Ollama reachable, fetching model list via SDK (GET /api/tags)") - } - - models, err := ollama.FetchModels(ctx, baseURL) - if err != nil { - return fmt.Errorf("fetch models: %w", err) + // Fetch local ollama models (best effort — command still shows cloud models without it). + var models []ollama.Model + ux.Debugf("→ GET %s [%s]", baseURL, time.Now().Format("15:04:05.000")) + t0 := time.Now() + ollamaUp := wantLocal && ollama.Detect(ctx, baseURL) + ux.Debugf("← GET %s elapsed=%s reachable=%v", baseURL, time.Since(t0).Round(time.Millisecond), ollamaUp) + if ollamaUp { + ux.Debugf("→ GET %s/api/tags [%s]", baseURL, time.Now().Format("15:04:05.000")) + t0 = time.Now() + localModels, err := ollama.FetchModels(ctx, baseURL) + if err != nil { + return fmt.Errorf("fetch models: %w", err) + } + ux.Debugf("← GET %s/api/tags elapsed=%s items=%d", baseURL, time.Since(t0).Round(time.Millisecond), len(localModels)) + for _, m := range localModels { + ux.Debugf(" %s (size=%s, family=%s)", m.Name, m.ParameterSize, m.Family) + } + models = append(models, localModels...) } - if debug { - log.Debug(fmt.Sprintf("Fetched %d models from ollama", len(models))) - for _, m := range models { - log.Debug(fmt.Sprintf(" %s (size=%s, family=%s)", m.Name, m.ParameterSize, m.Family)) + // Fetch cloud models from OpenRouter (best effort). + if wantCloud { + ux.Debugf("→ GET %s cached=%v [%s]", cloudmodels.OpenRouterURL, cache.Has("openrouter-models.json", cloudmodels.OpenRouterCacheTTL), time.Now().Format("15:04:05.000")) + t0 = time.Now() + cloudRaw, cloudErr := cloudmodels.FetchProviderModels(ctx, cloudmodels.OpenRouterURL) + if cloudErr != nil { + ux.Debugf("← GET %s elapsed=%s error=%v", cloudmodels.OpenRouterURL, time.Since(t0).Round(time.Millisecond), cloudErr) + } else { + filtered := cloudmodels.FilterTrustedProviders(cloudmodels.FilterLatestGen(cloudRaw)) + ux.Debugf("← GET %s elapsed=%s items=%d after_filter=%d", cloudmodels.OpenRouterURL, time.Since(t0).Round(time.Millisecond), len(cloudRaw), len(filtered)) + models = append(models, filtered...) } } if len(models) == 0 { - ux.Warn("Ollama is running but no models installed.") - ux.Info("Pull a model: ollama pull deepseek-r1:32b") + switch source { + case "local": + ux.Warn("No local models found. Is ollama running? Try: ollama serve") + case "cloud": + ux.Warn("No cloud models found (OpenRouter unavailable).") + default: + ux.Warn("No models found (ollama not running and OpenRouter unavailable).") + } return nil } // Fetch live SWE-bench scores (falls back to hardcoded on failure). var sweScores map[string]float64 - if debug { - log.Debug(fmt.Sprintf("Fetching SWE-bench Verified leaderboard from %s", ollama.SWEBenchURL)) - } + ux.Debugf("→ GET %s cached=%v [%s]", ollama.SWEBenchURL, cache.Has("swebench-scores.json", ollama.SWEBenchCacheTTL), time.Now().Format("15:04:05.000")) + t0 = time.Now() sweScores, sweErr := ollama.FetchSWEBenchScores(ctx, ollama.SWEBenchURL) if sweErr != nil { - if debug { - log.Debug(fmt.Sprintf("SWE-bench fetch failed (using fallback ratings): %v", sweErr)) - } - } else if debug { - log.Debug(fmt.Sprintf("Fetched %d open-source model scores from SWE-bench Verified", len(sweScores))) - for model, score := range sweScores { - log.Debug(fmt.Sprintf(" %s → %.1f%%", model, score)) - } + ux.Debugf("← GET %s elapsed=%s error=%v", ollama.SWEBenchURL, time.Since(t0).Round(time.Millisecond), sweErr) + } else { + ux.Debugf("← GET %s elapsed=%s items=%d", ollama.SWEBenchURL, time.Since(t0).Round(time.Millisecond), len(sweScores)) } - // Fetch HuggingFace model info (best effort, per model) BEFORE ranking, - // so RankModels can use HF repo IDs for SWE-bench score matching. + // Fetch HuggingFace model info for local ollama models only (best effort). + // Cloud models (Provider != "") are not on HuggingFace, so skip them. hfInfoMap := make(map[string]ollama.HFModelInfo) for _, m := range models { + if m.Provider != "" && m.Provider != "ollama" { + continue // skip cloud models — not on HuggingFace + } family := ollama.ModelFamily(m.Name) if _, done := hfInfoMap[family]; done { continue } + ux.Debugf("→ GET %s/%s [%s]", ollama.HuggingFaceAPIURL, family, time.Now().Format("15:04:05.000")) + t0 = time.Now() info, hfErr := ollama.FetchHFModelInfo(ctx, ollama.HuggingFaceAPIURL, family) if hfErr != nil { - if debug { - log.Debug(fmt.Sprintf("HuggingFace lookup failed for %s: %v", family, hfErr)) - } + ux.Debugf("← GET %s/%s elapsed=%s error=%v", ollama.HuggingFaceAPIURL, family, time.Since(t0).Round(time.Millisecond), hfErr) continue } hfInfoMap[family] = info - if debug { - log.Debug(fmt.Sprintf("HuggingFace: %s → %s (tags: %v)", family, info.ModelID, info.Tags)) - } + ux.Debugf("← GET %s/%s elapsed=%s model_id=%s tags=%v", ollama.HuggingFaceAPIURL, family, time.Since(t0).Round(time.Millisecond), info.ModelID, info.Tags) } - ranked := ollama.RankModels(models, 10, sweScores, hfInfoMap) + // Detect system RAM for hardware check. + systemRAM := ollama.GetSystemRAMGB() + ux.Debugf("system RAM: %.1f GB", systemRAM) - if debug { - log.Debug("Ranking models (live SWE-bench scores where available, fallback estimates otherwise)") - log.Debug("Note: SWE-bench scores are for full-precision models with agentic scaffolding.") - log.Debug(" Quantized ollama variants will score lower in practice.") - log.Debug("Sources: https://www.swebench.com/ | https://epoch.ai/benchmarks/swe-bench-verified") - for _, r := range ranked { - if r.SWEScore > 0 { - log.Debug(fmt.Sprintf(" %s → %.1f%% [%s]", r.Name, r.SWEScore, r.ScoreSource)) - } else { - log.Debug(fmt.Sprintf(" %s → no rating data", r.Name)) - } + // Split into local and cloud model lists for separate ranking. + var localModels, cloudModelsList []ollama.Model + for _, m := range models { + if m.Provider == "" || m.Provider == "ollama" { + localModels = append(localModels, m) + } else { + cloudModelsList = append(cloudModelsList, m) } } - // Detect system RAM for hardware check. - systemRAM := ollama.GetSystemRAMGB() - if debug { - log.Debug(fmt.Sprintf("System RAM: %.1f GB", systemRAM)) + rankedCloud := ollama.RankModels(cloudModelsList, 20, sweScores, nil, 0, "swe") + rankedLocal := ollama.RankModels(localModels, 0, sweScores, hfInfoMap, systemRAM, "") + + ux.Debugf("ranking: %d cloud models (by SWE score), %d local models (by recommended)", len(rankedCloud), len(rankedLocal)) + ux.Debugf("note: scores are for full-precision models with agentic scaffolding") + for _, r := range rankedCloud { + if r.SWEScore > 0 { + ux.Debugf(" cloud %s → %.1f%% [%s]", r.Name, r.SWEScore, r.ScoreSource) + } + } + for _, r := range rankedLocal { + if r.SWEScore > 0 { + ux.Debugf(" local %s → %.1f%% [%s]", r.Name, r.SWEScore, r.ScoreSource) + } else { + ux.Debugf(" local %s → no rating data", r.Name) + } } - renderModels(ranked, hfInfoMap, systemRAM) + renderModels(rankedCloud, rankedLocal, cloudModelsList, localModels, sweScores, hfInfoMap, systemRAM) if ux.OutputFormat == "text" { fmt.Println(modGray.Render(fmt.Sprintf("%*s", 70, fmt.Sprintf("ollama %s", baseURL)))) @@ -153,14 +183,15 @@ Examples: } else { ux.Info("Scores from SWE-bench Verified (full-model, not quantized).") } - ux.Info(fmt.Sprintf("Hardware: Q4 estimate vs %.0fGB RAM. --debug for details.", systemRAM)) - fmt.Println() - - snippet := ollama.FormatTOMLSnippet(ranked) - ux.Info(fmt.Sprintf("%d models found. Add to ~/.config/devcell/devcell.toml:", len(ranked))) - fmt.Println() - for _, line := range strings.Split(snippet, "\n") { - fmt.Printf(" %s\n", line) + if len(rankedLocal) > 0 { + ux.Info(fmt.Sprintf("Hardware: Q4 estimate vs %.0fGB RAM. --debug for details.", systemRAM)) + fmt.Println() + snippet := ollama.FormatTOMLSnippet(rankedLocal) + ux.Info(fmt.Sprintf("%d local models found. Add to ~/.config/devcell/devcell.toml:", len(rankedLocal))) + fmt.Println() + for _, line := range strings.Split(snippet, "\n") { + fmt.Printf(" %s\n", line) + } } fmt.Println() } @@ -169,13 +200,121 @@ Examples: }, } -// renderModels displays the ranked model list in the current OutputFormat. -// In json/yaml mode, prose (header, TOML snippet, footer) is suppressed. +// buildAllRows converts a combined ranked model list to table rows. +// Columns: #, Model, Source, Rating, Speed, Size, Type, Score +// Source is the provider name for cloud models, or "local" for ollama models. +func buildAllRows(ranked []ollama.RankedModel, hfInfoMap map[string]ollama.HFModelInfo, systemRAM float64) [][]string { + rows := make([][]string, 0, len(ranked)) + for _, r := range ranked { + rating := modGray.Render("-") + if r.SWEScore > 0 { + label := fmt.Sprintf("~%.0f%%", r.SWEScore) + if r.ScoreSource == "est" { + label += " " + modGray.Render("est") + } + rating = label + } + family := ollama.ModelFamily(r.Name) + taskLabel := "General" + if info, ok := hfInfoMap[family]; ok { + taskLabel = ollama.InferTaskLabel(info, r.Name) + } else { + taskLabel = ollama.InferTaskLabel(ollama.HFModelInfo{}, r.Name) + } + var source string + if r.Provider == "" || r.Provider == "ollama" { + source = modGreen.Render("local") + } else { + source = modGray.Render("cloud") + } + size := r.ParameterSize + if size == "" { + size = modGray.Render("-") + } + // For local models show RAM fit; cloud shows "-". + if (r.Provider == "" || r.Provider == "ollama") && systemRAM > 0 { + paramsB := ollama.ParseParamSize(r.ParameterSize) + if paramsB > 0 { + needed := ollama.EstimateRAMGB(paramsB) + ratio := needed / systemRAM + switch { + case ratio > 1.00: + size = modRed.Render(fmt.Sprintf("%.0fGB!", needed)) + case ratio > 0.90: + size = modRed.Render(fmt.Sprintf("%.0fGB~", needed)) + case ratio > 0.75: + size = ux.StyleWarning.Render(fmt.Sprintf("%.0fGB?", needed)) + default: + size = modGreen.Render(fmt.Sprintf("%.0fGB", needed)) + } + } + } + rows = append(rows, []string{ + fmt.Sprintf("%d", r.Rank), + r.Name, + source, + rating, + modGray.Render(fmt.Sprintf("%.0fT/m", r.SpeedTPM)), + size, + taskLabel, + modGray.Render(fmt.Sprintf("%.1f", r.RecommendedScore)), + }) + } + return rows +} + +// mergeAndRank combines cloud and local ranked lists, sorts by the given key, +// and assigns fresh sequential ranks. sortBy matches the values accepted by +// ollama.RankModels: "swe", "speed", "size", or "" / "recommended". +func mergeAndRank(cloud, local []ollama.RankedModel, sortBy string) []ollama.RankedModel { + merged := make([]ollama.RankedModel, 0, len(cloud)+len(local)) + merged = append(merged, cloud...) + merged = append(merged, local...) + sort.Slice(merged, func(i, j int) bool { + switch sortBy { + case "swe": + if merged[i].SWEScore != merged[j].SWEScore { + return merged[i].SWEScore > merged[j].SWEScore + } + case "speed": + if merged[i].SpeedTPM != merged[j].SpeedTPM { + return merged[i].SpeedTPM > merged[j].SpeedTPM + } + case "size": + si := ollama.ParseParamSize(merged[i].ParameterSize) + sj := ollama.ParseParamSize(merged[j].ParameterSize) + if si != sj { + return si > sj + } + } + // default / tiebreaker: recommended score + if merged[i].RecommendedScore != merged[j].RecommendedScore { + return merged[i].RecommendedScore > merged[j].RecommendedScore + } + return merged[i].Name < merged[j].Name + }) + for i := range merged { + merged[i].Rank = i + 1 + } + return merged +} + +// renderModels displays ranked cloud and local model lists in the current OutputFormat. +// In json/yaml mode, prose headers are suppressed. // Extracted for testability without a live ollama daemon. -func renderModels(ranked []ollama.RankedModel, hfInfoMap map[string]ollama.HFModelInfo, systemRAM float64) { +func renderModels( + rankedCloud []ollama.RankedModel, + rankedLocal []ollama.RankedModel, + allCloud []ollama.Model, + allLocal []ollama.Model, + sweScores map[string]float64, + hfInfoMap map[string]ollama.HFModelInfo, + systemRAM float64, +) { if ux.OutputFormat != "text" { - entries := make([]ModelEntry, 0, len(ranked)) - for _, r := range ranked { + all := append(rankedCloud, rankedLocal...) + entries := make([]ModelEntry, 0, len(all)) + for _, r := range all { family := ollama.ModelFamily(r.Name) taskLabel := "General" if info, ok := hfInfoMap[family]; ok { @@ -184,83 +323,63 @@ func renderModels(ranked []ollama.RankedModel, hfInfoMap map[string]ollama.HFMod taskLabel = ollama.InferTaskLabel(ollama.HFModelInfo{}, r.Name) } hw := "" - if systemRAM > 0 { - ok, needed := ollama.CheckHardware(r.ParameterSize, systemRAM) - if needed > 0 { - if ok { - hw = fmt.Sprintf("OK (%.0fGB)", needed) - } else { - hw = fmt.Sprintf("%.0fGB needed", needed) + if r.Provider == "" || r.Provider == "ollama" { + if systemRAM > 0 { + ok, needed := ollama.CheckHardware(r.ParameterSize, systemRAM) + if needed > 0 { + if ok { + hw = fmt.Sprintf("OK (%.0fGB)", needed) + } else { + hw = fmt.Sprintf("%.0fGB needed", needed) + } } } + } else { + hw = "cloud" } size := r.ParameterSize if size == "" { size = "-" } + provider := r.Provider + if provider == "" { + provider = "ollama" + } entries = append(entries, ModelEntry{ - Rank: r.Rank, - Name: r.Name, - SWEScore: r.SWEScore, - Size: size, - Type: taskLabel, - Hardware: hw, + Rank: r.Rank, + Name: r.Name, + Provider: provider, + SWEScore: r.SWEScore, + Size: size, + Type: taskLabel, + SpeedTPM: r.SpeedTPM, + Hardware: hw, + RecommendedScore: r.RecommendedScore, }) } ux.PrintData(entries) return } - // Text mode: prose header + styled table. - fmt.Println() - fmt.Println(modBold.Render(" Local Models (ranked by SWE-Bench score)")) - fmt.Println() - - headers := []string{"#", "Model", "Rating", "Size", "Type", "Hardware"} - rows := make([][]string, 0, len(ranked)) - for _, r := range ranked { - score := modGray.Render("-") - if r.SWEScore > 0 { - label := fmt.Sprintf("~%.0f%%", r.SWEScore) - if r.ScoreSource != "" { - label += " " + modGray.Render(r.ScoreSource) - } - score = label - } - size := r.ParameterSize - if size == "" { - size = "-" - } - family := ollama.ModelFamily(r.Name) - taskLabel := "General" - if info, ok := hfInfoMap[family]; ok { - taskLabel = ollama.InferTaskLabel(info, r.Name) - } else { - taskLabel = ollama.InferTaskLabel(ollama.HFModelInfo{}, r.Name) - } - hwLabel := modGray.Render("-") - if systemRAM > 0 { - ok, needed := ollama.CheckHardware(r.ParameterSize, systemRAM) - if needed > 0 { - if ok { - hwLabel = modGreen.Render(fmt.Sprintf("OK (%.0fGB)", needed)) - } else { - hwLabel = modRed.Render(fmt.Sprintf("%.0fGB needed", needed)) - } - } - } - rows = append(rows, []string{ - fmt.Sprintf("%d", r.Rank), - r.Name, - score, - size, - taskLabel, - hwLabel, + // Text mode: single combined table ranked by recommended score. + merged := mergeAndRank(rankedCloud, rankedLocal, "") + if len(merged) > 0 { + fmt.Println() + fmt.Println(modBold.Render(" Available Models (local + cloud, ranked by recommended score)")) + fmt.Println() + headers := []string{"#", "Model", "Source", "Rating", "Speed", "Size", "Type", "Score"} + rows := buildAllRows(merged, hfInfoMap, systemRAM) + ux.InteractiveTable(headers, rows, func(key ux.SortKey) [][]string { + sortBy := ux.SortKeyString(key) + reSortedCloud := ollama.RankModels(allCloud, 20, sweScores, nil, 0, sortBy) + reSortedLocal := ollama.RankModels(allLocal, 0, sweScores, hfInfoMap, systemRAM, sortBy) + reMerged := mergeAndRank(reSortedCloud, reSortedLocal, sortBy) + return buildAllRows(reMerged, hfInfoMap, systemRAM) }) } - ux.PrintTable(headers, rows) } func init() { modelsCmd.Flags().Bool("debug", false, "Show detailed detection and ranking logs") + modelsCmd.Flags().String("source", "all", "Filter models by source: local, cloud, all") } diff --git a/cmd/models_format_test.go b/cmd/models_format_test.go index f5c7b8b..5400c8e 100644 --- a/cmd/models_format_test.go +++ b/cmd/models_format_test.go @@ -15,13 +15,13 @@ func TestRenderModels_JSONOutputIsValidJSON(t *testing.T) { ux.OutputFormat = "json" defer func() { ux.OutputFormat = "text" }() - ranked := []ollama.RankedModel{ + local := []ollama.RankedModel{ {Model: ollama.Model{Name: "deepseek-r1:32b", ParameterSize: "32B"}, SWEScore: 49.2, Rank: 1, ScoreSource: "SWE"}, {Model: ollama.Model{Name: "qwen3:8b", ParameterSize: "8B"}, SWEScore: 28.0, Rank: 2, ScoreSource: "est"}, } out := captureStdoutMain(func() { - renderModels(ranked, map[string]ollama.HFModelInfo{}, 32.0) + renderModels(nil, local, nil, nil, nil, map[string]ollama.HFModelInfo{}, 32.0) }) var result []map[string]any @@ -37,12 +37,12 @@ func TestRenderModels_JSONContainsNameAndRank(t *testing.T) { ux.OutputFormat = "json" defer func() { ux.OutputFormat = "text" }() - ranked := []ollama.RankedModel{ + local := []ollama.RankedModel{ {Model: ollama.Model{Name: "deepseek-r1:32b", ParameterSize: "32B"}, SWEScore: 49.2, Rank: 1, ScoreSource: "SWE"}, } out := captureStdoutMain(func() { - renderModels(ranked, map[string]ollama.HFModelInfo{}, 0) + renderModels(nil, local, nil, nil, nil, map[string]ollama.HFModelInfo{}, 0) }) var result []map[string]any @@ -63,16 +63,16 @@ func TestRenderModels_JSONSuppressesProseOutput(t *testing.T) { ux.OutputFormat = "json" defer func() { ux.OutputFormat = "text" }() - ranked := []ollama.RankedModel{ + local := []ollama.RankedModel{ {Model: ollama.Model{Name: "qwen3:8b", ParameterSize: "8B"}, SWEScore: 28.0, Rank: 1}, } out := captureStdoutMain(func() { - renderModels(ranked, map[string]ollama.HFModelInfo{}, 0) + renderModels(nil, local, nil, nil, nil, map[string]ollama.HFModelInfo{}, 0) }) - // Prose like "Local Models", TOML snippet markers, "ollama" footer should NOT appear - if strings.Contains(out, "Local Models") { + // Prose like "Available Models", TOML snippet markers should NOT appear in JSON mode + if strings.Contains(out, "Available Models") { t.Errorf("json mode should suppress prose header, got: %q", out) } if strings.Contains(out, "[ollama]") { @@ -83,12 +83,12 @@ func TestRenderModels_JSONSuppressesProseOutput(t *testing.T) { func TestRenderModels_TextContainsModelName(t *testing.T) { ux.OutputFormat = "text" - ranked := []ollama.RankedModel{ + local := []ollama.RankedModel{ {Model: ollama.Model{Name: "deepseek-r1:32b", ParameterSize: "32B"}, SWEScore: 49.2, Rank: 1, ScoreSource: "SWE"}, } out := captureStdoutMain(func() { - renderModels(ranked, map[string]ollama.HFModelInfo{}, 0) + renderModels(nil, local, nil, nil, nil, map[string]ollama.HFModelInfo{}, 0) }) if !strings.Contains(out, "deepseek-r1:32b") { @@ -99,15 +99,15 @@ func TestRenderModels_TextContainsModelName(t *testing.T) { func TestRenderModels_TextIncludesProseHeader(t *testing.T) { ux.OutputFormat = "text" - ranked := []ollama.RankedModel{ + local := []ollama.RankedModel{ {Model: ollama.Model{Name: "qwen3:8b", ParameterSize: "8B"}, Rank: 1}, } out := captureStdoutMain(func() { - renderModels(ranked, map[string]ollama.HFModelInfo{}, 0) + renderModels(nil, local, nil, nil, nil, map[string]ollama.HFModelInfo{}, 0) }) - if !strings.Contains(out, "Local Models") { + if !strings.Contains(out, "Available Models") { t.Errorf("text mode should include prose header, got: %q", out) } } diff --git a/cmd/models_test.go b/cmd/models_test.go index f719a97..f7bd23d 100644 --- a/cmd/models_test.go +++ b/cmd/models_test.go @@ -7,19 +7,16 @@ import ( ) // TestModels_NoOllama verifies that `cell models` exits cleanly when -// ollama is not reachable (no error, informative message). +// ollama is not reachable. Cloud models from OpenRouter are shown instead. func TestModels_NoOllama(t *testing.T) { cmd := exec.Command(binaryPath, "models") out, err := cmd.CombinedOutput() if err != nil { t.Fatalf("cell models failed: %v\noutput: %s", err, out) } - - s := string(out) - if !strings.Contains(s, "not reachable") && !strings.Contains(s, "not detected") && - !strings.Contains(s, "No ollama") && !strings.Contains(s, "not found") { - t.Errorf("expected 'not reachable/detected' message, got:\n%s", s) - } + // Either cloud models are shown (normal case) or a "no models" warning + // appears if OpenRouter is also unreachable. Either way, exit code is 0. + _ = string(out) // command exits cleanly — that is the assertion } // TestModels_InHelp verifies the models command appears in --help output. diff --git a/cmd/opencode.go b/cmd/opencode.go index 2b65b64..7dd4e71 100644 --- a/cmd/opencode.go +++ b/cmd/opencode.go @@ -206,7 +206,7 @@ func autoDetectOllamaModels() cfg.LLMModelsSection { if dbg { fmt.Fprintf(os.Stderr, " opencode: found %d models from ollama\n", len(models)) } - ranked := ollama.RankModels(models, 0, nil, nil) + ranked := ollama.RankModels(models, 0, nil, nil, 0, "") if len(ranked) == 0 { return cfg.LLMModelsSection{} } diff --git a/cmd/rdp.go b/cmd/rdp.go index 086d722..05d5ca6 100644 --- a/cmd/rdp.go +++ b/cmd/rdp.go @@ -5,11 +5,13 @@ import ( "fmt" "os" "os/exec" + "path/filepath" "runtime" "strings" "github.com/DimmKirr/devcell/internal/config" internalrdp "github.com/DimmKirr/devcell/internal/rdp" + "github.com/DimmKirr/devcell/internal/runner" "github.com/DimmKirr/devcell/internal/ux" "github.com/spf13/cobra" ) @@ -31,19 +33,15 @@ just the numeric suffix: func init() { rdpCmd.Flags().Bool("list", false, "list all running cell containers and their RDP ports") - rdpCmd.Flags().Bool("verbose", false, "show debug info for RDP port lookup") + rdpCmd.Flags().Bool("global", false, "include all projects (docker + vagrant), not just the current one") rdpCmd.Flags().Bool("fullscreen", false, "open RDP session in fullscreen mode") rdpCmd.Flags().String("viewer", "", "RDP viewer: freerdp (default), macrdp, royaltsx") } func runRDP(cmd *cobra.Command, args []string) error { applyOutputFlags() - verbose, _ := cmd.Flags().GetBool("verbose") - if verbose { - ux.Verbose = true - ux.LogPlainText = true - } list, _ := cmd.Flags().GetBool("list") + rdpGlobal, _ = cmd.Flags().GetBool("global") rdpFullscreen, _ = cmd.Flags().GetBool("fullscreen") rdpViewer, _ = cmd.Flags().GetString("viewer") @@ -57,6 +55,7 @@ func runRDP(cmd *cobra.Command, args []string) error { } var ( + rdpGlobal bool // set by --global flag rdpFullscreen bool // set by --fullscreen flag rdpViewer string // set by --viewer flag ) @@ -142,6 +141,81 @@ func openRoyalTSX(c config.Config, port string) error { return openURL(internalrdp.RoyalTSXUrl(port, c.HostUser, "rdp")) } +// collectRDPCells returns a map of appName→rdpPort for running cells. +// When global is false (default) only the current project's cell is returned. +// When global is true all docker cells and all vagrant VMs are included. +func collectRDPCells(c config.Config, global bool) map[string]string { + result := make(map[string]string) + rdpDebug("collectRDPCells: global=%v baseDir=%s buildDir=%s", global, c.BaseDir, c.BuildDir) + + if global { + // All docker cell containers + rdpDebug("docker: scanning all cell- containers") + out, err := exec.Command("docker", "ps", + "--filter", "name=cell-", + "--format", "{{.Names}}\t{{.Ports}}").Output() + if err != nil { + rdpDebug("docker ps error: %v", err) + } else { + rdpDebug("docker ps output (%d bytes): %s", len(out), bytes.TrimSpace(out)) + if dm, _ := internalrdp.ParseDockerPS(string(bytes.TrimSpace(out))); len(dm) > 0 { + for k, v := range dm { + rdpDebug("docker cell found: %s → %s", k, v) + result[k] = v + } + } + } + // All vagrant VMs via global-status + vagrant port (no file-system access) + rdpDebug("vagrant: running global-status") + vagrantCells := runner.VagrantRunningCells() + rdpDebug("vagrant global-status parsed: %d running .devcell VMs: %v", len(vagrantCells), vagrantCells) + for project, machineID := range vagrantCells { + rdpDebug("vagrant: querying port for machine %s (project %s)", machineID, project) + if port, ok := runner.VagrantMachinePort(machineID, "3389"); ok { + appName := "vagrant-" + project + rdpDebug("vagrant cell found: %s → %s", appName, port) + result[appName] = port + } else { + rdpDebug("vagrant: no RDP port for machine %s", machineID) + } + } + } else { + // Current project docker cells only — filter by project prefix (all cell IDs) + projectPrefix := "cell-" + filepath.Base(c.BaseDir) + "-" + rdpDebug("docker: scanning with filter name=%s", projectPrefix) + out, err := exec.Command("docker", "ps", + "--filter", "name="+projectPrefix, + "--format", "{{.Names}}\t{{.Ports}}").Output() + if err != nil { + rdpDebug("docker ps error: %v", err) + } else { + rdpDebug("docker ps output (%d bytes): %s", len(out), bytes.TrimSpace(out)) + if dm, _ := internalrdp.ParseDockerPS(string(bytes.TrimSpace(out))); len(dm) > 0 { + for k, v := range dm { + rdpDebug("docker cell found: %s → %s", k, v) + result[k] = v + } + } + } + // Current project vagrant VM only + rdpDebug("vagrant: checking buildDir=%s", c.BuildDir) + running := runner.VagrantIsRunning(c.BuildDir) + rdpDebug("vagrant: VagrantIsRunning=%v", running) + if running { + if port, ok := runner.VagrantReadForwardedPort(c.BuildDir, "rdp"); ok { + appName := "vagrant-" + filepath.Base(c.BaseDir) + rdpDebug("vagrant cell found: %s → %s", appName, port) + result[appName] = port + } else { + rdpDebug("vagrant: no RDP port found in Vagrantfile") + } + } + } + + rdpDebug("collectRDPCells result: %v", result) + return result +} + func rdpDefault() error { c, err := config.LoadFromOS() if err != nil { @@ -153,78 +227,37 @@ func rdpDefault() error { return openRDP(c, port) } - rdpDebug("basedir: %s", c.BaseDir) - rdpDebug("cellID: %s (computed port: %s)", c.CellID, c.RDPPort) + rdpDebug("basedir: %s cellID: %s rdpPort: %s", c.BaseDir, c.CellID, c.RDPPort) - // Strategy 1: exact label match - out, err := exec.Command("docker", "ps", - "--filter", "label=devcell.basedir="+c.BaseDir, - "--filter", "label=devcell.cellid="+c.CellID, - "--format", "{{.Names}}\t{{.Ports}}").Output() - if err == nil { - rdpDebug("label-exact docker ps output: %q", strings.TrimSpace(string(out))) - if m, _ := internalrdp.ParseDockerPS(string(bytes.TrimSpace(out))); len(m) > 0 { - for appName, port := range m { - rdpDebug("label-exact match: %s → %s", appName, port) - return openRDP(c, port) - } - } - } - - // Strategy 2: basedir-only label match - out, err = exec.Command("docker", "ps", - "--filter", "label=devcell.basedir="+c.BaseDir, - "--format", "{{.Names}}\t{{.Ports}}").Output() - if err == nil { - rdpDebug("label-dir docker ps output: %q", strings.TrimSpace(string(out))) - if m, _ := internalrdp.ParseDockerPS(string(bytes.TrimSpace(out))); len(m) > 0 { - if len(m) == 1 { - for appName, port := range m { - rdpDebug("label-dir single match: %s → %s", appName, port) - return openRDP(c, port) - } - } - selected, err := selectCell(m) - if err != nil { - return err - } - return openRDP(c, m[selected]) + cells := collectRDPCells(c, rdpGlobal) + var dockerCount, vagrantCount int + for name := range cells { + if strings.HasPrefix(name, "vagrant-") { + vagrantCount++ + } else { + dockerCount++ } } + rdpDebug("found %d cells: %d docker, %d vagrant — %v", len(cells), dockerCount, vagrantCount, cells) - // Strategy 3: bind-mount fallback - rdpDebug("no label match; falling back to bind-mount inspect") - allOut, err := exec.Command("docker", "ps", "-q", "--filter", "name=cell-").Output() - if err != nil || len(bytes.TrimSpace(allOut)) == 0 { - return fmt.Errorf("no running cell found for %q — run 'cell rdp --list' to see all", c.BaseDir) - } - ids := strings.Fields(string(bytes.TrimSpace(allOut))) - rdpDebug("inspecting %d containers: %v", len(ids), ids) - inspectOut, err := exec.Command("docker", append([]string{"inspect"}, ids...)...).Output() - if err != nil { - return fmt.Errorf("docker inspect: %w", err) - } - matches, err := internalrdp.FindContainersByBind(string(inspectOut), c.BaseDir) - if err != nil { - return fmt.Errorf("parse inspect: %w", err) - } - rdpDebug("bind-mount matches: %+v", matches) - switch len(matches) { + switch len(cells) { case 0: return fmt.Errorf("no running cell found for %q — run 'cell rdp --list' to see all", c.BaseDir) case 1: - return openRDP(c, matches[0].Port) - default: - bindM := make(map[string]string, len(matches)) - for _, m := range matches { - bindM[m.AppName] = m.Port + for name, port := range cells { + rdpDebug("auto-selecting only cell: %s (port %s)", name, port) + return openRDP(c, port) } - selected, err := selectCell(bindM) + default: + rdpDebug("multiple cells — showing picker") + selected, err := selectCell(cells) if err != nil { return err } - return openRDP(c, bindM[selected]) + rdpDebug("selected: %s (port %s)", selected, cells[selected]) + return openRDP(c, cells[selected]) } + return nil } func rdpDebug(format string, args ...any) { @@ -234,17 +267,11 @@ func rdpDebug(format string, args ...any) { } func rdpList() error { - out, err := exec.Command("docker", "ps", - "--filter", "name=cell-", - "--format", "{{.Names}}\t{{.Ports}}").Output() - if err != nil { - return fmt.Errorf("docker ps: %w", err) - } - m, err := internalrdp.ParseDockerPS(string(bytes.TrimSpace(out))) + c, err := config.LoadFromOS() if err != nil { return err } - return renderRDPList(m) + return renderRDPList(collectRDPCells(c, rdpGlobal)) } // renderRDPList renders the RDP container map in the current OutputFormat. @@ -272,6 +299,14 @@ func rdpApp(appName string) error { if err != nil { return err } + // Vagrant cell: name has "vagrant-" prefix + if strings.HasPrefix(appName, "vagrant-") { + if !runner.VagrantIsRunning(c.BuildDir) { + return fmt.Errorf("vagrant VM %q is not running", appName) + } + return openRDP(c, c.RDPPort) + } + // Docker cell containerName := "cell-" + appName + "-run" out, err := exec.Command("docker", "inspect", containerName).Output() if err != nil { diff --git a/cmd/rdp_format_test.go b/cmd/rdp_format_test.go index f146912..be9492e 100644 --- a/cmd/rdp_format_test.go +++ b/cmd/rdp_format_test.go @@ -107,3 +107,61 @@ func TestRenderRDPList_URLIncludedInJSON(t *testing.T) { t.Errorf("url should contain port 3389, got %q", url) } } + +// L0: vagrant-named entries render correctly — renderRDPList is pure (no I/O). + +func TestRenderRDPList_VagrantEntryText(t *testing.T) { + ux.OutputFormat = "text" + + m := map[string]string{"vagrant-myproject": "40589"} + + out := captureStdoutMain(func() { renderRDPList(m) }) + + if !strings.Contains(out, "vagrant-myproject") { + t.Errorf("text output must contain vagrant app name, got: %q", out) + } + if !strings.Contains(out, "40589") { + t.Errorf("text output must contain vagrant RDP port, got: %q", out) + } +} + +func TestRenderRDPList_VagrantEntryJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + m := map[string]string{"vagrant-myproject": "40589"} + + out := captureStdoutMain(func() { renderRDPList(m) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v\noutput: %q", err, out) + } + if len(result) != 1 { + t.Fatalf("want 1 entry, got %d", len(result)) + } + if result[0]["app_name"] != "vagrant-myproject" { + t.Errorf("want app_name=vagrant-myproject, got %q", result[0]["app_name"]) + } + if result[0]["port"] != "40589" { + t.Errorf("want port=40589, got %q", result[0]["port"]) + } +} + +func TestRenderRDPList_MixedDockerAndVagrant(t *testing.T) { + ux.OutputFormat = "text" + + m := map[string]string{ + "cell-myproject-3-run": "389", + "vagrant-myproject": "40589", + } + + out := captureStdoutMain(func() { renderRDPList(m) }) + + if !strings.Contains(out, "cell-myproject-3-run") { + t.Errorf("text output must contain docker app name, got: %q", out) + } + if !strings.Contains(out, "vagrant-myproject") { + t.Errorf("text output must contain vagrant app name, got: %q", out) + } +} diff --git a/cmd/root.go b/cmd/root.go index 715aeb7..e31a903 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -204,19 +204,41 @@ func runAgent(binary string, defaultFlags, userArgs []string, extraEnv map[strin } } - // Vagrant engine branch — stub, not yet implemented + // Vagrant engine branch + // Priority: CLI flag > [cell] config > default. + cellCfgForEngine := cfg.LoadFromOS(c.ConfigDir, c.BaseDir) engine := scanStringFlag("--engine") + if engine == "" { + engine = cellCfgForEngine.Cell.Engine + } if scanFlag("--macos") { engine = "vagrant" } if engine == "vagrant" { vagrantBox := scanStringFlag("--vagrant-box") - if err := scaffold.ScaffoldVagrantfile(c.ConfigDir, vagrantBox, ""); err != nil { - fmt.Fprintf(os.Stderr, "warning: vagrantfile scaffold failed: %v\n", err) + if vagrantBox == "" { + vagrantBox = cellCfgForEngine.Cell.VagrantBox } - fmt.Fprintln(os.Stderr, "Vagrant engine is not yet implemented.") - fmt.Fprintf(os.Stderr, "Vagrantfile scaffolded at: %s/Vagrantfile\n", c.ConfigDir) - return nil + if vagrantBox == "" { + vagrantBox = "utm/bookworm" + } + vagrantProvider := scanStringFlag("--vagrant-provider") + if vagrantProvider == "" { + vagrantProvider = cellCfgForEngine.Cell.VagrantProvider + } + if vagrantProvider == "" { + vagrantProvider = "utm" + } + cellCfgForVagrant := cellCfgForEngine + return runVagrantAgent( + binary, defaultFlags, userArgs, + c.BuildDir, c.BaseDir, + cellCfgForVagrant, + vagrantBox, vagrantProvider, + c.VNCPort, c.RDPPort, + c.HostHome, + scanFlag("--dry-run"), + ) } cellCfg := cfg.LoadFromOS(c.ConfigDir, c.BaseDir) @@ -482,8 +504,12 @@ func buildImageWithSpinner(configDir string, noCache bool, label string, silent sp := ux.NewProgressSpinner(label) if err := runner.BuildImage(ctx, configDir, noCache, ux.Verbose, out); err != nil { sp.Fail(label + " failed") - if !ux.Verbose && buf.Len() > 0 { - fmt.Fprint(os.Stderr, buf.String()) + if !ux.Verbose { + if hint := ux.ClassifyBuildOutput(buf.String()); hint != nil { + ux.PrintBuildErrorHint(hint) + } else if buf.Len() > 0 { + fmt.Fprint(os.Stderr, buf.String()) + } } return err } diff --git a/cmd/vagrant_runner.go b/cmd/vagrant_runner.go new file mode 100644 index 0000000..06a13f3 --- /dev/null +++ b/cmd/vagrant_runner.go @@ -0,0 +1,328 @@ +package main + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/DimmKirr/devcell/internal/cfg" + "github.com/DimmKirr/devcell/internal/runner" + "github.com/DimmKirr/devcell/internal/scaffold" +) + +// runVagrantAgent is the vagrant-engine equivalent of the docker runAgent path. +// It: +// 1. Scaffolds a Linux Vagrantfile in vagrantDir (idempotent) +// 2. Ensures the VM is up (skipped in dry-run mode) +// 3. Execs: vagrant ssh -- -t [env KEY=VAL...] +// with cmd.Dir=vagrantDir so vagrant locates the correct Vagrantfile +// stackNeedsGUI reports whether the stack + modules configuration includes +// desktop/GUI components. Only "ultimate" and "electronics" stacks include +// the desktop module; it can also be added explicitly via extra modules. +func stackNeedsGUI(stack string, modules []string) bool { + switch stack { + case "ultimate", "electronics": + return true + } + for _, m := range modules { + if m == "desktop" { + return true + } + } + return false +} + +func runVagrantAgent( + binary string, + defaultFlags, userArgs []string, + configDir, baseDir string, + cellCfg cfg.CellConfig, + vagrantBox, provider string, + vncPort, rdpPort string, + hostHome string, + dryRun bool, +) error { + vagrantDir := configDir + + // Resolve nixhome path (used for Vagrantfile template, stack.nix generation, and upload). + // Prefer the local nixhome/ in the project root; fall back to vagrantDir/nixhome/ if present. + nixhomePath := resolveVagrantNixhome(baseDir) + if nixhomePath == "" { + if _, err := os.Stat(filepath.Join(vagrantDir, "nixhome")); err == nil { + nixhomePath = filepath.Join(vagrantDir, "nixhome") + } + } + + // Resolve devcell config dir for synced folder (same as what Docker mounts + // as /etc/devcell/config). Use DEVCELL_CONFIG_DIR if set, else ~/.config/devcell. + vmConfigDir := os.Getenv("DEVCELL_CONFIG_DIR") + if vmConfigDir == "" { + vmConfigDir = filepath.Join(hostHome, ".config", "devcell") + } + + stack := cellCfg.Cell.ResolvedStack() + + // 1. Scaffold Linux Vagrantfile (idempotent — skips if already exists). + if err := scaffold.ScaffoldLinuxVagrantfile( + vagrantDir, vagrantBox, provider, stack, + baseDir, nixhomePath, vncPort, rdpPort, + hostHome, vmConfigDir, + ); err != nil { + fmt.Fprintf(os.Stderr, "warning: vagrantfile scaffold failed: %v\n", err) + } + + // 1b. Generate hosts/linux/stack.nix to reflect current stack + modules. + // Done before any upload so the generated file is included if provisioning runs. + if err := scaffold.ScaffoldVagrantLinuxStack(nixhomePath, stack, cellCfg.Cell.Modules); err != nil { + fmt.Fprintf(os.Stderr, "warning: stack.nix generation failed: %v\n", err) + } + + // 2. Ensure VM is up (no-op in dry-run mode). + if err := runner.VagrantEnsureUp(context.Background(), vagrantDir, provider, dryRun); err != nil { + return fmt.Errorf("vagrant up: %w", err) + } + + // 2b. Provision when needed — mirrors Docker's autoDetect/staleImage logic: + // --update flag → nix flake update inside VM, then provision + // --build flag → explicit rebuild requested + // binary absent → first run or broken provision (auto-detect) + needsUpdate := scanFlag("--update") + needsBuild := scanFlag("--build") || needsUpdate + if !needsBuild && !dryRun { + checkCtx, checkCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer checkCancel() + if !runner.VagrantBinaryExists(checkCtx, vagrantDir, binary) { + fmt.Printf(" %s not found in VM — provisioning automatically (this may take a while)\n", binary) + needsBuild = true + } + } + if needsBuild { + ctx := context.Background() + if err := runner.VagrantUploadNixhome(ctx, vagrantDir, nixhomePath, dryRun); err != nil { + fmt.Fprintf(os.Stderr, "warning: nixhome upload failed: %v\n", err) + } + if needsUpdate { + if err := vagrantFlakeUpdate(vagrantDir, dryRun); err != nil { + return err + } + } + if err := runner.VagrantProvision(ctx, vagrantDir, dryRun); err != nil { + return fmt.Errorf("vagrant provision: %w", err) + } + } + + // 2c. Start GUI services when the stack includes desktop and GUI is enabled. + guiNeeded := cellCfg.Cell.ResolvedGUI() && stackNeedsGUI(stack, cellCfg.Cell.Modules) + if guiNeeded { + guiCtx, guiCancel := context.WithTimeout(context.Background(), 30*time.Second) + defer guiCancel() + if err := runner.VagrantEnsureGUI(guiCtx, vagrantDir, dryRun); err != nil { + fmt.Fprintf(os.Stderr, "warning: GUI startup failed: %v\n", err) + } + } + + // 3. Build the argv and exec (or print for dry-run). + envVars := buildVagrantEnvVars(cellCfg) + // Inject host-side forwarded ports so `cell rdp`/`cell vnc` inside the VM + // can use the same EXT_* fast path that docker containers rely on. + if vncPort != "" { + envVars = append(envVars, "EXT_VNC_PORT="+vncPort) + } + if rdpPort != "" { + envVars = append(envVars, "EXT_RDP_PORT="+rdpPort) + } + if guiNeeded { + envVars = append(envVars, "DISPLAY=:99") + } + spec := runner.VagrantSpec{ + Binary: binary, + DefaultFlags: defaultFlags, + UserArgs: userArgs, + VagrantDir: vagrantDir, + Provider: provider, + EnvVars: envVars, + ProjectDir: baseDir, + } + argv := runner.BuildVagrantSSHArgv(spec) + + if dryRun { + fmt.Printf("(cd %q && %s)\n", vagrantDir, shellJoin(argv)) + return nil + } + + cmd := exec.Command(argv[0], argv[1:]...) + cmd.Dir = vagrantDir // vagrant ssh must run from the Vagrantfile directory + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + os.Exit(exitErr.ExitCode()) + } + return err + } + return nil +} + +// runVagrantBuild implements `cell build --engine=vagrant`. +// Analogous to buildImageWithSpinner for Docker: +// - cell build --engine=vagrant → vagrant provision (re-applies nixhome flake) +// - cell build --update --engine=vagrant → nix flake update inside VM, then provision +// +// nixhome source priority: +// 1. DEVCELL_NIXHOME_PATH env var (explicit override) +// 2. baseDir/nixhome/ local directory (project-local nixhome) +// 3. GitHub (provisioner fallback when /opt/nixhome is absent in VM) +func runVagrantBuild(vagrantDir, baseDir string, cellCfg cfg.CellConfig, update, dryRun bool) error { + ctx := context.Background() + + // Ensure VM is up before provisioning. + vagrantProvider := scanStringFlag("--vagrant-provider") + if vagrantProvider == "" { + vagrantProvider = cellCfg.Cell.VagrantProvider + } + if vagrantProvider == "" { + vagrantProvider = "utm" + } + if err := runner.VagrantEnsureUp(ctx, vagrantDir, vagrantProvider, dryRun); err != nil { + return fmt.Errorf("vagrant up: %w", err) + } + + // Generate stack.nix to reflect current stack + modules, then upload nixhome. + nixhomePath := resolveVagrantNixhome(baseDir) + stack := cellCfg.Cell.ResolvedStack() + if err := scaffold.ScaffoldVagrantLinuxStack(nixhomePath, stack, cellCfg.Cell.Modules); err != nil { + fmt.Fprintf(os.Stderr, "warning: stack.nix generation failed: %v\n", err) + } + if err := runner.VagrantUploadNixhome(ctx, vagrantDir, nixhomePath, dryRun); err != nil { + fmt.Fprintf(os.Stderr, "warning: nixhome upload failed: %v\n", err) + } + + // --update: run `nix flake update` inside the VM before provisioning. + if update { + if err := vagrantFlakeUpdate(vagrantDir, dryRun); err != nil { + return err + } + } + + // Run vagrant provision (re-applies the nixhome home-manager flake). + return runner.VagrantProvision(ctx, vagrantDir, dryRun) +} + +// vagrantFlakeUpdate runs `nix flake update` inside the VM. +// nixhome was uploaded to ~/nixhome; update is run there (falls back to ~/.config/home-manager). +func vagrantFlakeUpdate(vagrantDir string, dryRun bool) error { + updateCmd := "bash -l -c 'cd ~/nixhome 2>/dev/null || cd ~/.config/home-manager 2>/dev/null || true; nix flake update'" + if dryRun { + fmt.Printf("(cd %q && vagrant ssh -- -t %s)\n", vagrantDir, updateCmd) + return nil + } + cmd := exec.Command("vagrant", "ssh", "--", "-t", updateCmd) + cmd.Dir = vagrantDir + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("nix flake update in VM: %w", err) + } + return nil +} + +// resolveVagrantNixhome returns the local nixhome path to upload to the VM. +// Priority: DEVCELL_NIXHOME_PATH env var → baseDir/nixhome/ → "" (GitHub fallback). +func resolveVagrantNixhome(baseDir string) string { + if p := os.Getenv("DEVCELL_NIXHOME_PATH"); p != "" { + return p + } + if baseDir != "" { + local := filepath.Join(baseDir, "nixhome") + if _, err := os.Stat(local); err == nil { + return local + } + } + return "" +} + +// buildVagrantEnvVars collects the env vars to forward into the Vagrant VM, +// mirroring what Docker's BuildArgv passes as -e flags but skipping vars that +// are Docker/container-specific (APP_NAME, HOME, IS_SANDBOX, WORKSPACE, etc.). +func buildVagrantEnvVars(cellCfg cfg.CellConfig) []string { + var envs []string + e := func(k, v string) { + if v != "" { + envs = append(envs, k+"="+v) + } + } + + // Terminal type + e("TERM", os.Getenv("TERM")) + + // Git identity: host env > [git] toml > defaults + gitCfg := cellCfg.Git + hostGitEnv := os.Getenv("GIT_AUTHOR_NAME") != "" || + os.Getenv("GIT_AUTHOR_EMAIL") != "" || + os.Getenv("GIT_COMMITTER_NAME") != "" || + os.Getenv("GIT_COMMITTER_EMAIL") != "" + + if hostGitEnv { + e("GIT_AUTHOR_NAME", envOrDefault(os.Getenv("GIT_AUTHOR_NAME"), "DevCell")) + e("GIT_AUTHOR_EMAIL", envOrDefault(os.Getenv("GIT_AUTHOR_EMAIL"), "devcell@devcell.io")) + e("GIT_COMMITTER_NAME", envOrDefault(os.Getenv("GIT_COMMITTER_NAME"), "DevCell")) + e("GIT_COMMITTER_EMAIL", envOrDefault(os.Getenv("GIT_COMMITTER_EMAIL"), "devcell@devcell.io")) + } else if gitCfg.HasIdentity() { + e("GIT_AUTHOR_NAME", gitCfg.AuthorName) + e("GIT_AUTHOR_EMAIL", gitCfg.AuthorEmail) + e("GIT_COMMITTER_NAME", gitCfg.ResolvedCommitterName()) + e("GIT_COMMITTER_EMAIL", gitCfg.ResolvedCommitterEmail()) + } else { + e("GIT_AUTHOR_NAME", "DevCell") + e("GIT_AUTHOR_EMAIL", "devcell@devcell.io") + e("GIT_COMMITTER_NAME", "DevCell") + e("GIT_COMMITTER_EMAIL", "devcell@devcell.io") + } + + // Timezone: config wins, then host $TZ + if tz := cellCfg.Cell.Timezone; tz != "" { + e("TZ", tz) + } else { + e("TZ", os.Getenv("TZ")) + } + + // Locale: config wins, then host $LANG, then default + if loc := cellCfg.Cell.Locale; loc != "" { + e("LANG", loc) + e("LC_ALL", loc) + } else if loc := os.Getenv("LANG"); loc != "" && loc != "POSIX" && loc != "C" { + e("LANG", loc) + e("LC_ALL", loc) + } else { + envs = append(envs, "LANG=en_US.UTF-8", "LC_ALL=en_US.UTF-8") + } + + // Ollama base URL for codex --ollama + e("CODEX_OSS_BASE_URL", envOrDefault(os.Getenv("CODEX_OSS_BASE_URL"), "")) + + // cfg [env] entries + for k, v := range cellCfg.Env { + envs = append(envs, k+"="+v) + } + + // cfg [mise] entries → MISE_=value + for k, v := range cellCfg.Mise { + envs = append(envs, "MISE_"+strings.ToUpper(k)+"="+v) + } + + return envs +} + +// envOrDefault returns val if non-empty, else def. +func envOrDefault(val, def string) string { + if val != "" { + return val + } + return def +} diff --git a/cmd/vagrant_test.go b/cmd/vagrant_test.go index 2111841..59af206 100644 --- a/cmd/vagrant_test.go +++ b/cmd/vagrant_test.go @@ -25,27 +25,33 @@ func vagrantHome(t *testing.T) string { return home } -// TestEngineVagrant_PrintsStubWarning checks that --engine=vagrant prints a -// "not yet implemented" warning and exits 0 without printing docker argv. -func TestEngineVagrant_PrintsStubWarning(t *testing.T) { +// TestEngineVagrant_DryRunPrintsArgv checks that --engine=vagrant --dry-run +// prints a vagrant ssh argv and exits 0 (no "not yet implemented"). +func TestEngineVagrant_DryRunPrintsArgv(t *testing.T) { home := vagrantHome(t) cmd := exec.Command(binaryPath, "--engine=vagrant", "shell", "--dry-run") cmd.Dir = home cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) out, err := cmd.CombinedOutput() if err != nil { - t.Fatalf("expected exit 0 for vagrant stub, got: %v\noutput: %s", err, out) + t.Fatalf("expected exit 0, got: %v\noutput: %s", err, out) } s := string(out) - if !strings.Contains(strings.ToLower(s), "not yet implemented") { - t.Errorf("expected 'not yet implemented' in output, got:\n%s", s) + if !strings.Contains(s, "vagrant") { + t.Errorf("expected 'vagrant' in dry-run argv output, got:\n%s", s) + } + if !strings.Contains(s, "ssh") { + t.Errorf("expected 'ssh' in dry-run argv output, got:\n%s", s) + } + if strings.Contains(strings.ToLower(s), "not yet implemented") { + t.Errorf("dry-run should not print 'not yet implemented', got:\n%s", s) } if strings.Contains(s, "docker run") { - t.Errorf("vagrant stub should not print docker run argv, got:\n%s", s) + t.Errorf("vagrant engine should not print docker run argv, got:\n%s", s) } } -// TestEngineMacos_AliasForVagrant checks that --macos produces the same stub. +// TestEngineMacos_AliasForVagrant checks that --macos produces the same vagrant argv. func TestEngineMacos_AliasForVagrant(t *testing.T) { home := vagrantHome(t) cmd := exec.Command(binaryPath, "--macos", "shell", "--dry-run") @@ -53,29 +59,53 @@ func TestEngineMacos_AliasForVagrant(t *testing.T) { cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) out, err := cmd.CombinedOutput() if err != nil { - t.Fatalf("expected exit 0 for --macos stub, got: %v\noutput: %s", err, out) + t.Fatalf("expected exit 0 for --macos, got: %v\noutput: %s", err, out) } s := string(out) - if !strings.Contains(strings.ToLower(s), "not yet implemented") { - t.Errorf("expected 'not yet implemented' in output, got:\n%s", s) + if !strings.Contains(s, "vagrant") { + t.Errorf("expected 'vagrant' in --macos dry-run output, got:\n%s", s) } } -// TestEngineVagrant_ScaffoldsVagrantfile checks that running --engine=vagrant -// creates a Vagrantfile in the config directory. -func TestEngineVagrant_ScaffoldsVagrantfile(t *testing.T) { +// TestEngineVagrant_ScaffoldsLinuxVagrantfile checks that running --engine=vagrant +// creates a Vagrantfile in the project's .devcell/ directory (not global config). +func TestEngineVagrant_ScaffoldsLinuxVagrantfile(t *testing.T) { home := vagrantHome(t) - cfgDir := filepath.Join(home, ".config", "devcell") cmd := exec.Command(binaryPath, "--engine=vagrant", "shell", "--dry-run") cmd.Dir = home cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) if out, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("expected exit 0 for vagrant stub, got: %v\noutput: %s", err, out) + t.Fatalf("expected exit 0, got: %v\noutput: %s", err, out) + } + + if _, err := os.Stat(filepath.Join(home, ".devcell", "Vagrantfile")); err != nil { + t.Errorf("Vagrantfile not created in .devcell/: %v", err) } +} + +// TestEngineVagrant_VagrantfileContainsLinuxProvisioner checks the Vagrantfile +// includes the Nix provisioner (linux template, not macOS one). +func TestEngineVagrant_VagrantfileContainsLinuxProvisioner(t *testing.T) { + home := vagrantHome(t) - if _, err := os.Stat(filepath.Join(cfgDir, "Vagrantfile")); err != nil { - t.Errorf("Vagrantfile not created in config dir: %v", err) + cmd := exec.Command(binaryPath, "--engine=vagrant", "shell", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("expected exit 0, got: %v\noutput: %s", err, out) + } + + data, err := os.ReadFile(filepath.Join(home, ".devcell", "Vagrantfile")) + if err != nil { + t.Fatalf("Vagrantfile not found: %v", err) + } + content := string(data) + if !strings.Contains(content, "nix") { + t.Errorf("expected Nix provisioner in Vagrantfile, got:\n%s", content) + } + if !strings.Contains(content, "home-manager") { + t.Errorf("expected home-manager in Vagrantfile, got:\n%s", content) } } @@ -83,16 +113,15 @@ func TestEngineVagrant_ScaffoldsVagrantfile(t *testing.T) { // into the Vagrantfile. func TestEngineVagrant_BoxNameSubstituted(t *testing.T) { home := vagrantHome(t) - cfgDir := filepath.Join(home, ".config", "devcell") cmd := exec.Command(binaryPath, "--engine=vagrant", "--vagrant-box=my-test-box", "shell", "--dry-run") cmd.Dir = home cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) if out, err := cmd.CombinedOutput(); err != nil { - t.Fatalf("expected exit 0 for vagrant stub, got: %v\noutput: %s", err, out) + t.Fatalf("expected exit 0, got: %v\noutput: %s", err, out) } - data, err := os.ReadFile(filepath.Join(cfgDir, "Vagrantfile")) + data, err := os.ReadFile(filepath.Join(home, ".devcell", "Vagrantfile")) if err != nil { t.Fatalf("Vagrantfile not found: %v", err) } @@ -100,3 +129,21 @@ func TestEngineVagrant_BoxNameSubstituted(t *testing.T) { t.Errorf("box name not substituted in Vagrantfile:\n%s", string(data)) } } + +// TestEngineVagrant_DryRunContainsVagrantDir checks that the dry-run argv +// includes the .devcell/ path (so vagrant knows where to find the Vagrantfile). +func TestEngineVagrant_DryRunContainsVagrantDir(t *testing.T) { + home := vagrantHome(t) + buildDir := filepath.Join(home, ".devcell") + + cmd := exec.Command(binaryPath, "--engine=vagrant", "shell", "--dry-run") + cmd.Dir = home + cmd.Env = append(os.Environ(), "CELL_ID=1", "HOME="+home) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("expected exit 0, got: %v\noutput: %s", err, out) + } + if !strings.Contains(string(out), buildDir) { + t.Errorf("expected .devcell dir %q in dry-run argv, got:\n%s", buildDir, string(out)) + } +} diff --git a/cmd/vnc.go b/cmd/vnc.go index c4f2b1c..1020c7a 100644 --- a/cmd/vnc.go +++ b/cmd/vnc.go @@ -5,11 +5,13 @@ import ( "fmt" "os" "os/exec" + "path/filepath" "runtime" "strings" "github.com/DimmKirr/devcell/internal/config" internalrdp "github.com/DimmKirr/devcell/internal/rdp" + "github.com/DimmKirr/devcell/internal/runner" "github.com/DimmKirr/devcell/internal/ux" internalvnc "github.com/DimmKirr/devcell/internal/vnc" "github.com/spf13/cobra" @@ -32,18 +34,14 @@ just the numeric suffix: func init() { vncCmd.Flags().Bool("list", false, "list all running cell containers and their VNC ports") - vncCmd.Flags().Bool("verbose", false, "show debug info for VNC port lookup") + vncCmd.Flags().Bool("global", false, "include all projects (docker + vagrant), not just the current one") vncCmd.Flags().String("viewer", "", "VNC viewer: royaltsx, tigervnc, screensharing (macOS)") } func runVNC(cmd *cobra.Command, args []string) error { applyOutputFlags() - verbose, _ := cmd.Flags().GetBool("verbose") - if verbose { - ux.Verbose = true - ux.LogPlainText = true - } list, _ := cmd.Flags().GetBool("list") + vncGlobal, _ = cmd.Flags().GetBool("global") vncViewer, _ = cmd.Flags().GetString("viewer") if list { @@ -55,6 +53,8 @@ func runVNC(cmd *cobra.Command, args []string) error { return vncDefault() } +var vncGlobal bool // set by --global flag + // vncViewer is set by the --viewer flag. var vncViewer string @@ -117,10 +117,86 @@ func openVNCScreenSharing(port string) error { return openURL(internalvnc.VNCUrl(port)) } +// collectVNCCells returns a unified map of appName→vncPort for all running cells: +// Docker containers (all cell- containers) and the project's vagrant VM (if running). +// collectVNCCells returns a map of appName→vncPort for running cells. +// When global is false (default) only the current project's cell is returned. +// When global is true all docker cells and all vagrant VMs are included. +func collectVNCCells(c config.Config, global bool) map[string]string { + result := make(map[string]string) + vncDebug("collectVNCCells: global=%v baseDir=%s buildDir=%s", global, c.BaseDir, c.BuildDir) + + if global { + // All docker cell containers + vncDebug("docker: scanning all cell- containers") + out, err := exec.Command("docker", "ps", + "--filter", "name=cell-", + "--format", "{{.Names}}\t{{.Ports}}").Output() + if err != nil { + vncDebug("docker ps error: %v", err) + } else { + vncDebug("docker ps output (%d bytes): %s", len(out), bytes.TrimSpace(out)) + if dm, _ := internalvnc.ParseDockerPS(string(bytes.TrimSpace(out))); len(dm) > 0 { + for k, v := range dm { + vncDebug("docker cell found: %s → %s", k, v) + result[k] = v + } + } + } + // All vagrant VMs via global-status + vagrant port (no file-system access) + vncDebug("vagrant: running global-status") + vagrantCells := runner.VagrantRunningCells() + vncDebug("vagrant global-status parsed: %d running .devcell VMs: %v", len(vagrantCells), vagrantCells) + for project, machineID := range vagrantCells { + vncDebug("vagrant: querying port for machine %s (project %s)", machineID, project) + if port, ok := runner.VagrantMachinePort(machineID, "5900"); ok { + appName := "vagrant-" + project + vncDebug("vagrant cell found: %s → %s", appName, port) + result[appName] = port + } else { + vncDebug("vagrant: no VNC port for machine %s", machineID) + } + } + } else { + // Current project docker cells only — filter by project prefix (all cell IDs) + projectPrefix := "cell-" + filepath.Base(c.BaseDir) + "-" + vncDebug("docker: scanning with filter name=%s", projectPrefix) + out, err := exec.Command("docker", "ps", + "--filter", "name="+projectPrefix, + "--format", "{{.Names}}\t{{.Ports}}").Output() + if err != nil { + vncDebug("docker ps error: %v", err) + } else { + vncDebug("docker ps output (%d bytes): %s", len(out), bytes.TrimSpace(out)) + if dm, _ := internalvnc.ParseDockerPS(string(bytes.TrimSpace(out))); len(dm) > 0 { + for k, v := range dm { + vncDebug("docker cell found: %s → %s", k, v) + result[k] = v + } + } + } + // Current project vagrant VM only + vncDebug("vagrant: checking buildDir=%s", c.BuildDir) + running := runner.VagrantIsRunning(c.BuildDir) + vncDebug("vagrant: VagrantIsRunning=%v", running) + if running { + if port, ok := runner.VagrantReadForwardedPort(c.BuildDir, "vnc"); ok { + appName := "vagrant-" + filepath.Base(c.BaseDir) + vncDebug("vagrant cell found: %s → %s", appName, port) + result[appName] = port + } else { + vncDebug("vagrant: no VNC port found in Vagrantfile") + } + } + } + + vncDebug("collectVNCCells result: %v", result) + return result +} + func vncDefault() error { // Fast path: EXT_VNC_PORT is injected at container start with the correct - // published host port. When set, we're inside a devcell container and can - // use it directly without any docker lookup. + // published host port. When set, we're inside a devcell container. if port := os.Getenv("EXT_VNC_PORT"); port != "" { vncDebug("EXT_VNC_PORT=%s (fast path)", port) return openVNC(port) @@ -130,79 +206,37 @@ func vncDefault() error { if err != nil { return err } - vncDebug("basedir: %s", c.BaseDir) - vncDebug("cellID: %s (computed port: %s)", c.CellID, c.VNCPort) - - // --- Strategy 1: exact label match (containers started with current code) --- - // Filter by both basedir AND cellid for an exact session match. - out, err := exec.Command("docker", "ps", - "--filter", "label=devcell.basedir="+c.BaseDir, - "--filter", "label=devcell.cellid="+c.CellID, - "--format", "{{.Names}}\t{{.Ports}}").Output() - if err == nil { - vncDebug("label-exact docker ps output: %q", strings.TrimSpace(string(out))) - if m, _ := internalvnc.ParseDockerPS(string(bytes.TrimSpace(out))); len(m) > 0 { - for appName, port := range m { - vncDebug("label-exact match: %s → %s", appName, port) - return openVNC(port) - } - } - } + vncDebug("basedir: %s cellID: %s vncPort: %s", c.BaseDir, c.CellID, c.VNCPort) - // --- Strategy 2: basedir-only label match (different session, same dir) --- - out, err = exec.Command("docker", "ps", - "--filter", "label=devcell.basedir="+c.BaseDir, - "--format", "{{.Names}}\t{{.Ports}}").Output() - if err == nil { - vncDebug("label-dir docker ps output: %q", strings.TrimSpace(string(out))) - if m, _ := internalvnc.ParseDockerPS(string(bytes.TrimSpace(out))); len(m) > 0 { - if len(m) == 1 { - for appName, port := range m { - vncDebug("label-dir single match: %s → %s", appName, port) - return openVNC(port) - } - } - selected, err := selectCell(m) - if err != nil { - return err - } - return openVNC(m[selected]) + cells := collectVNCCells(c, vncGlobal) + var dockerCount, vagrantCount int + for name := range cells { + if strings.HasPrefix(name, "vagrant-") { + vagrantCount++ + } else { + dockerCount++ } } + vncDebug("found %d cells: %d docker, %d vagrant — %v", len(cells), dockerCount, vagrantCount, cells) - // --- Strategy 3: bind-mount fallback (containers started before labels were added) --- - vncDebug("no label match; falling back to bind-mount inspect") - allOut, err := exec.Command("docker", "ps", "-q", "--filter", "name=cell-").Output() - if err != nil || len(bytes.TrimSpace(allOut)) == 0 { - return fmt.Errorf("no running cell found for %q — run 'cell vnc --list' to see all", c.BaseDir) - } - ids := strings.Fields(string(bytes.TrimSpace(allOut))) - vncDebug("inspecting %d containers: %v", len(ids), ids) - inspectOut, err := exec.Command("docker", append([]string{"inspect"}, ids...)...).Output() - if err != nil { - return fmt.Errorf("docker inspect: %w", err) - } - matches, err := internalvnc.FindContainersByBind(string(inspectOut), c.BaseDir) - if err != nil { - return fmt.Errorf("parse inspect: %w", err) - } - vncDebug("bind-mount matches: %+v", matches) - switch len(matches) { + switch len(cells) { case 0: return fmt.Errorf("no running cell found for %q — run 'cell vnc --list' to see all", c.BaseDir) case 1: - return openVNC(matches[0].Port) - default: - bindM := make(map[string]string, len(matches)) - for _, m := range matches { - bindM[m.AppName] = m.Port + for name, port := range cells { + vncDebug("auto-selecting only cell: %s (port %s)", name, port) + return openVNC(port) } - selected, err := selectCell(bindM) + default: + vncDebug("multiple cells — showing picker") + selected, err := selectCell(cells) if err != nil { return err } - return openVNC(bindM[selected]) + vncDebug("selected: %s (port %s)", selected, cells[selected]) + return openVNC(cells[selected]) } + return nil } // vncDebug prints a debug line when --verbose is active. @@ -213,17 +247,11 @@ func vncDebug(format string, args ...any) { } func vncList() error { - out, err := exec.Command("docker", "ps", - "--filter", "name=cell-", - "--format", "{{.Names}}\t{{.Ports}}").Output() - if err != nil { - return fmt.Errorf("docker ps: %w", err) - } - m, err := internalvnc.ParseDockerPS(string(bytes.TrimSpace(out))) + c, err := config.LoadFromOS() if err != nil { return err } - return renderVNCList(m) + return renderVNCList(collectVNCCells(c, vncGlobal)) } // renderVNCList renders the VNC container map in the current OutputFormat. @@ -247,6 +275,18 @@ func renderVNCList(m map[string]string) error { } func vncApp(appName string) error { + // Vagrant cell: name has "vagrant-" prefix + if strings.HasPrefix(appName, "vagrant-") { + c, err := config.LoadFromOS() + if err != nil { + return err + } + if !runner.VagrantIsRunning(c.BuildDir) { + return fmt.Errorf("vagrant VM %q is not running", appName) + } + return openVNC(c.VNCPort) + } + // Docker cell containerName := "cell-" + appName + "-run" out, err := exec.Command("docker", "inspect", containerName).Output() if err != nil { diff --git a/cmd/vnc_format_test.go b/cmd/vnc_format_test.go index 142dc16..2e87ea5 100644 --- a/cmd/vnc_format_test.go +++ b/cmd/vnc_format_test.go @@ -87,3 +87,61 @@ func TestRenderVNCList_URLIncludedInJSON(t *testing.T) { t.Errorf("url should contain port 5900, got %q", url) } } + +// L0: vagrant-named entries render correctly — renderVNCList is pure (no I/O). + +func TestRenderVNCList_VagrantEntryText(t *testing.T) { + ux.OutputFormat = "text" + + m := map[string]string{"vagrant-myproject": "40550"} + + out := captureStdoutMain(func() { renderVNCList(m) }) + + if !strings.Contains(out, "vagrant-myproject") { + t.Errorf("text output must contain vagrant app name, got: %q", out) + } + if !strings.Contains(out, "40550") { + t.Errorf("text output must contain vagrant VNC port, got: %q", out) + } +} + +func TestRenderVNCList_VagrantEntryJSON(t *testing.T) { + ux.OutputFormat = "json" + defer func() { ux.OutputFormat = "text" }() + + m := map[string]string{"vagrant-myproject": "40550"} + + out := captureStdoutMain(func() { renderVNCList(m) }) + + var result []map[string]string + if err := json.Unmarshal([]byte(out), &result); err != nil { + t.Fatalf("not valid JSON: %v\noutput: %q", err, out) + } + if len(result) != 1 { + t.Fatalf("want 1 entry, got %d", len(result)) + } + if result[0]["app_name"] != "vagrant-myproject" { + t.Errorf("want app_name=vagrant-myproject, got %q", result[0]["app_name"]) + } + if result[0]["port"] != "40550" { + t.Errorf("want port=40550, got %q", result[0]["port"]) + } +} + +func TestRenderVNCList_MixedDockerAndVagrant(t *testing.T) { + ux.OutputFormat = "text" + + m := map[string]string{ + "cell-myproject-3-run": "350", + "vagrant-myproject": "40550", + } + + out := captureStdoutMain(func() { renderVNCList(m) }) + + if !strings.Contains(out, "cell-myproject-3-run") { + t.Errorf("text output must contain docker app name, got: %q", out) + } + if !strings.Contains(out, "vagrant-myproject") { + t.Errorf("text output must contain vagrant app name, got: %q", out) + } +} diff --git a/internal/cache/cache.go b/internal/cache/cache.go new file mode 100644 index 0000000..346a4d0 --- /dev/null +++ b/internal/cache/cache.go @@ -0,0 +1,75 @@ +// Package cache provides TTL-based on-disk JSON caching for remote API responses. +// Cache files are stored under ~/.cache/devcell/ (or $XDG_CACHE_HOME/devcell/). +package cache + +import ( + "encoding/json" + "os" + "path/filepath" + "time" +) + +type entry[T any] struct { + CachedAt time.Time `json:"cached_at"` + Data T `json:"data"` +} + +// Dir returns the devcell cache directory. +// Respects $XDG_CACHE_HOME; falls back to ~/.cache/devcell. +func Dir() string { + if xdg := os.Getenv("XDG_CACHE_HOME"); xdg != "" { + return filepath.Join(xdg, "devcell") + } + home, _ := os.UserHomeDir() + return filepath.Join(home, ".cache", "devcell") +} + +// Load reads a cached value from disk. Returns (value, true) on a valid, +// non-expired cache hit; (zero, false) on a miss, parse error, or expiry. +func Load[T any](key string, ttl time.Duration) (T, bool) { + var zero T + data, err := os.ReadFile(filepath.Join(Dir(), key)) + if err != nil { + return zero, false + } + var e entry[T] + if err := json.Unmarshal(data, &e); err != nil { + return zero, false + } + if time.Since(e.CachedAt) > ttl { + return zero, false + } + return e.Data, true +} + +// Has returns true if a valid, non-expired cache entry exists for key. +// Useful for logging "cached vs network" before calling a fetch function. +func Has(key string, ttl time.Duration) bool { + data, err := os.ReadFile(filepath.Join(Dir(), key)) + if err != nil { + return false + } + // Only need the timestamp — unmarshal into a minimal struct. + var e struct { + CachedAt time.Time `json:"cached_at"` + } + if err := json.Unmarshal(data, &e); err != nil { + return false + } + return time.Since(e.CachedAt) <= ttl +} + +// Save writes a value to the cache. Silently ignores write errors so a +// read-only filesystem never breaks the calling command. +func Save[T any](key string, value T) { + dir := Dir() + if err := os.MkdirAll(dir, 0o755); err != nil { + return + } + e := entry[T]{CachedAt: time.Now(), Data: value} + data, err := json.Marshal(e) + if err != nil { + return + } + _ = os.WriteFile(filepath.Join(dir, key), data, 0o644) +} diff --git a/internal/cache/cache_test.go b/internal/cache/cache_test.go new file mode 100644 index 0000000..6cc5d97 --- /dev/null +++ b/internal/cache/cache_test.go @@ -0,0 +1,65 @@ +package cache_test + +import ( + "os" + "testing" + "time" + + "github.com/DimmKirr/devcell/internal/cache" +) + +func TestLoadSave_RoundTrip(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", t.TempDir()) + + cache.Save("test.json", map[string]float64{"a": 1.5, "b": 2.0}) + + got, ok := cache.Load[map[string]float64]("test.json", time.Hour) + if !ok { + t.Fatal("expected cache hit") + } + if got["a"] != 1.5 || got["b"] != 2.0 { + t.Errorf("got %v", got) + } +} + +func TestLoad_MissWhenExpired(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", t.TempDir()) + + cache.Save("expired.json", "hello") + + _, ok := cache.Load[string]("expired.json", -1*time.Second) // negative TTL = always expired + if ok { + t.Fatal("expected cache miss for expired entry") + } +} + +func TestLoad_MissWhenAbsent(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", t.TempDir()) + + _, ok := cache.Load[string]("nonexistent.json", time.Hour) + if ok { + t.Fatal("expected cache miss for absent file") + } +} + +func TestLoad_MissOnCorruptFile(t *testing.T) { + dir := t.TempDir() + t.Setenv("XDG_CACHE_HOME", dir) + + _ = os.WriteFile(dir+"/devcell/corrupt.json", []byte("not-json"), 0o644) + _ = os.MkdirAll(dir+"/devcell", 0o755) + _ = os.WriteFile(dir+"/devcell/corrupt.json", []byte("not-json"), 0o644) + + _, ok := cache.Load[string]("corrupt.json", time.Hour) + if ok { + t.Fatal("expected cache miss for corrupt file") + } +} + +func TestDir_UsesXDGWhenSet(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", "/tmp/xdg-test") + got := cache.Dir() + if got != "/tmp/xdg-test/devcell" { + t.Errorf("want /tmp/xdg-test/devcell, got %s", got) + } +} diff --git a/internal/cfg/cfg.go b/internal/cfg/cfg.go index 5ef72d3..32f5657 100644 --- a/internal/cfg/cfg.go +++ b/internal/cfg/cfg.go @@ -15,14 +15,18 @@ const DefaultRegistry = "public.ecr.aws/w1l3v2k8/devcell" // CellSection holds [cell] config. type CellSection struct { - ImageTag string `toml:"image_tag"` - Registry string `toml:"registry"` // container registry; default: DefaultRegistry; env: DEVCELL_REGISTRY - GUI *bool `toml:"gui"` // default: true (nil = not set → true) - Timezone string `toml:"timezone"` // IANA tz (e.g. "Europe/Prague"); default: host $TZ - Locale string `toml:"locale"` // POSIX locale (e.g. "en_US.UTF-8"); default: "en_US.UTF-8" - Stack string `toml:"stack"` // nix stack name (e.g. "go", "python"); default: "ultimate" - Modules []string `toml:"modules"` // extra nix modules to compose on top of stack - NixhomePath string `toml:"nixhome"` // local nixhome path; overridden by DEVCELL_NIXHOME_PATH env + ImageTag string `toml:"image_tag"` + Registry string `toml:"registry"` // container registry; default: DefaultRegistry; env: DEVCELL_REGISTRY + GUI *bool `toml:"gui"` // default: true (nil = not set → true) + Timezone string `toml:"timezone"` // IANA tz (e.g. "Europe/Prague"); default: host $TZ + Locale string `toml:"locale"` // POSIX locale (e.g. "en_US.UTF-8"); default: "en_US.UTF-8" + Stack string `toml:"stack"` // nix stack name (e.g. "go", "python"); default: "ultimate" + Modules []string `toml:"modules"` // extra nix modules to compose on top of stack + NixhomePath string `toml:"nixhome"` // local nixhome path; overridden by DEVCELL_NIXHOME_PATH env + Engine string `toml:"engine"` // execution engine: "docker" (default) or "vagrant" + VagrantProvider string `toml:"vagrant_provider"` // vagrant provider: "utm" (default) or "libvirt" + VagrantBox string `toml:"vagrant_box"` // vagrant box name override (default: "utm/bookworm") + DockerPrivileged bool `toml:"docker_privileged"` // run container with --privileged; default: false } // ResolvedRegistry returns the effective registry: env > toml > default. @@ -235,6 +239,9 @@ func Merge(global, project CellConfig) CellConfig { if project.Cell.Modules != nil { out.Cell.Modules = project.Cell.Modules } + if project.Cell.DockerPrivileged { + out.Cell.DockerPrivileged = true + } // LLM: project wins for scalars, providers accumulate out.LLM = global.LLM diff --git a/internal/cloudmodels/openrouter.go b/internal/cloudmodels/openrouter.go new file mode 100644 index 0000000..2da9706 --- /dev/null +++ b/internal/cloudmodels/openrouter.go @@ -0,0 +1,184 @@ +package cloudmodels + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "regexp" + "sort" + "strconv" + "strings" + "time" + + "github.com/DimmKirr/devcell/internal/cache" + "github.com/DimmKirr/devcell/internal/ollama" +) + +// OpenRouterURL is the public OpenRouter models endpoint (no auth required). +const OpenRouterURL = "https://openrouter.ai/api/v1/models" + +// openRouterResponse is the top-level response from GET /v1/models. +type openRouterResponse struct { + Data []openRouterModel `json:"data"` +} + +type openRouterModel struct { + ID string `json:"id"` + Name string `json:"name"` + Pricing openRouterPricing `json:"pricing"` +} + +type openRouterPricing struct { + Prompt string `json:"prompt"` + Completion string `json:"completion"` +} + +// OpenRouterCacheTTL is the on-disk cache lifetime for OpenRouter model listings. +const OpenRouterCacheTTL = time.Hour + +// FetchProviderModels fetches all models from OpenRouter and converts them +// to ollama.Model so they can be ranked alongside local models. +// Results are cached on-disk for 1 hour. +func FetchProviderModels(ctx context.Context, baseURL string) ([]ollama.Model, error) { + const cacheKey = "openrouter-models.json" + if models, ok := cache.Load[[]ollama.Model](cacheKey, OpenRouterCacheTTL); ok { + return models, nil + } + + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, baseURL, nil) + if err != nil { + return nil, fmt.Errorf("create request: %w", err) + } + req.Header.Set("HTTP-Referer", "https://github.com/DimmKirr/devcell") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetch openrouter models: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("openrouter returned %d", resp.StatusCode) + } + + var body openRouterResponse + if err := json.NewDecoder(resp.Body).Decode(&body); err != nil { + return nil, fmt.Errorf("decode openrouter response: %w", err) + } + + models := make([]ollama.Model, 0, len(body.Data)) + for _, m := range body.Data { + provider := extractProvider(m.ID) + completionPrice := parsePrice(m.Pricing.Completion) + models = append(models, ollama.Model{ + Name: m.ID, + Provider: provider, + CompletionPricePerToken: completionPrice, + }) + } + cache.Save(cacheKey, models) + return models, nil +} + +// TrustedProviders lists the cloud providers shown in `cell models`. +// Restricted to the major frontier labs to keep the list focused. +var TrustedProviders = []string{"anthropic", "openai", "google"} + +// FilterTrustedProviders removes models whose provider is not in TrustedProviders. +func FilterTrustedProviders(models []ollama.Model) []ollama.Model { + trusted := make(map[string]bool, len(TrustedProviders)) + for _, p := range TrustedProviders { + trusted[p] = true + } + out := make([]ollama.Model, 0, len(models)) + for _, m := range models { + if trusted[m.Provider] { + out = append(out, m) + } + } + return out +} + +// FilterLatestGen keeps only the highest-version model per family. +// Family is derived by stripping trailing version numbers from the model name part. +// E.g. "claude-opus-4-5" and "claude-opus-4" → family "claude-opus", keep "claude-opus-4-5". +func FilterLatestGen(models []ollama.Model) []ollama.Model { + type familyEntry struct { + model ollama.Model + version []int + } + + byFamily := make(map[string]familyEntry) + for _, m := range models { + family, version := parseModelFamily(m.Name) + if existing, ok := byFamily[family]; !ok || versionGreater(version, existing.version) { + byFamily[family] = familyEntry{model: m, version: version} + } + } + + result := make([]ollama.Model, 0, len(byFamily)) + for _, entry := range byFamily { + result = append(result, entry.model) + } + sort.Slice(result, func(i, j int) bool { + return result[i].Name < result[j].Name + }) + return result +} + +// versionTrailingRe matches trailing version parts like "-4-5", "-4", or "-4.6". +var versionTrailingRe = regexp.MustCompile(`([-\.]\d+)+$`) + +// versionSepRe splits version strings on "-" or ".". +var versionSepRe = regexp.MustCompile(`[-\.]`) + +// parseModelFamily splits "provider/model-name-4-5" into family="provider/model-name" +// and version=[4,5]. Handles both dash-separated (-4-5) and dot-separated (.6) versions. +// E.g. "anthropic/claude-opus-4.6" → family="anthropic/claude-opus", version=[4,6]. +func parseModelFamily(id string) (family string, version []int) { + loc := versionTrailingRe.FindStringIndex(id) + if loc == nil { + return id, nil + } + family = id[:loc[0]] + for _, part := range versionSepRe.Split(id[loc[0]:], -1) { + n, err := strconv.Atoi(part) + if err == nil { + version = append(version, n) + } + } + return family, version +} + +// versionGreater returns true if version a > b lexicographically. +func versionGreater(a, b []int) bool { + for i := 0; i < len(a) && i < len(b); i++ { + if a[i] != b[i] { + return a[i] > b[i] + } + } + return len(a) > len(b) +} + +func extractProvider(id string) string { + if idx := strings.Index(id, "/"); idx >= 0 { + return id[:idx] + } + return "" +} + +func parsePrice(s string) float64 { + s = strings.TrimSpace(s) + if s == "" || s == "0" { + return 0 + } + v, err := strconv.ParseFloat(s, 64) + if err != nil { + return 0 + } + return v +} diff --git a/internal/cloudmodels/openrouter_test.go b/internal/cloudmodels/openrouter_test.go new file mode 100644 index 0000000..c944be5 --- /dev/null +++ b/internal/cloudmodels/openrouter_test.go @@ -0,0 +1,131 @@ +package cloudmodels_test + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/DimmKirr/devcell/internal/cloudmodels" + "github.com/DimmKirr/devcell/internal/ollama" +) + +func TestFetchProviderModels_ParsesPricingAndName(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", t.TempDir()) + resp := map[string]any{ + "data": []any{ + map[string]any{ + "id": "anthropic/claude-opus-4-5", + "name": "Claude Opus 4.5", + "pricing": map[string]any{ + "prompt": "0.000015", + "completion": "0.000075", + }, + }, + map[string]any{ + "id": "openai/gpt-4o", + "name": "GPT-4o", + "pricing": map[string]any{ + "prompt": "0.000005", + "completion": "0.000015", + }, + }, + }, + } + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(resp) + })) + defer srv.Close() + + models, err := cloudmodels.FetchProviderModels(context.Background(), srv.URL) + if err != nil { + t.Fatalf("FetchProviderModels failed: %v", err) + } + if len(models) != 2 { + t.Fatalf("expected 2 models, got %d", len(models)) + } + m := models[0] + if m.Name != "anthropic/claude-opus-4-5" { + t.Errorf("expected name 'anthropic/claude-opus-4-5', got %q", m.Name) + } + if m.Provider != "anthropic" { + t.Errorf("expected provider 'anthropic', got %q", m.Provider) + } + if m.CompletionPricePerToken != 0.000075 { + t.Errorf("expected completion price 0.000075, got %v", m.CompletionPricePerToken) + } +} + +func TestFetchProviderModels_ReturnsError_WhenUnreachable(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", t.TempDir()) + _, err := cloudmodels.FetchProviderModels(context.Background(), "http://127.0.0.1:0") + if err == nil { + t.Error("expected error for unreachable server") + } +} + +func TestFilterLatestGen_KeepsHighestSemver(t *testing.T) { + models := []ollama.Model{ + {Name: "anthropic/claude-opus-4", Provider: "anthropic"}, + {Name: "anthropic/claude-opus-4-5", Provider: "anthropic"}, + {Name: "anthropic/claude-sonnet-4-5", Provider: "anthropic"}, + } + filtered := cloudmodels.FilterLatestGen(models) + if len(filtered) != 2 { + t.Fatalf("expected 2 models after filtering, got %d (names: %v)", len(filtered), modelNames(filtered)) + } + foundOpus45 := false + for _, m := range filtered { + if m.Name == "anthropic/claude-opus-4-5" { + foundOpus45 = true + } + if m.Name == "anthropic/claude-opus-4" { + t.Errorf("expected older claude-opus-4 to be filtered out") + } + } + if !foundOpus45 { + t.Error("expected claude-opus-4-5 to be kept") + } +} + +func TestFilterLatestGen_DifferentFamiliesKeptSeparately(t *testing.T) { + models := []ollama.Model{ + {Name: "google/gemini-2-5-pro", Provider: "google"}, + {Name: "google/gemini-2-5-flash", Provider: "google"}, + } + filtered := cloudmodels.FilterLatestGen(models) + if len(filtered) != 2 { + t.Fatalf("expected 2 models (pro+flash are different families), got %d", len(filtered), ) + } +} + +func TestFilterLatestGen_DotVersionDeduplicatesWithDash(t *testing.T) { + // claude-opus-4.6 (dot-version) should supersede claude-opus-4 (dash-only). + models := []ollama.Model{ + {Name: "anthropic/claude-opus-4", Provider: "anthropic"}, + {Name: "anthropic/claude-opus-4.6", Provider: "anthropic"}, + } + filtered := cloudmodels.FilterLatestGen(models) + if len(filtered) != 1 { + t.Fatalf("expected 1 model after dedup, got %d (names: %v)", len(filtered), modelNames(filtered)) + } + if filtered[0].Name != "anthropic/claude-opus-4.6" { + t.Errorf("expected claude-opus-4.6 to win, got %q", filtered[0].Name) + } +} + +func TestFilterLatestGen_Empty(t *testing.T) { + filtered := cloudmodels.FilterLatestGen(nil) + if len(filtered) != 0 { + t.Errorf("expected empty result for nil input, got %d", len(filtered)) + } +} + +func modelNames(models []ollama.Model) []string { + names := make([]string, len(models)) + for i, m := range models { + names[i] = m.Name + } + return names +} diff --git a/internal/cloudmodels/pipeline_test.go b/internal/cloudmodels/pipeline_test.go new file mode 100644 index 0000000..730eb1b --- /dev/null +++ b/internal/cloudmodels/pipeline_test.go @@ -0,0 +1,223 @@ +package cloudmodels_test + +// Integration tests for the full model-ranking pipeline: +// OpenRouter models → FetchProviderModels → FilterLatestGen +// + SWE-bench scores → FetchSWEBenchScores +// + RankModels → top-N list +// +// These tests use real-format payloads from test/testdata/ served by in-process +// HTTP servers to catch parsing regressions without network access. + +import ( + "context" + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/DimmKirr/devcell/internal/cloudmodels" + "github.com/DimmKirr/devcell/internal/ollama" +) + +// testdataDir returns the absolute path to test/testdata/ relative to this file. +func testdataDir() string { + _, file, _, _ := runtime.Caller(0) + // internal/cloudmodels/ → two levels up → test/testdata/ + return filepath.Join(filepath.Dir(file), "..", "..", "test", "testdata") +} + +// serveFile serves the contents of a testdata file over HTTP. +func serveFile(t *testing.T, name string) *httptest.Server { + t.Helper() + t.Setenv("XDG_CACHE_HOME", t.TempDir()) + data, err := os.ReadFile(filepath.Join(testdataDir(), name)) + if err != nil { + t.Fatalf("read testdata/%s: %v", name, err) + } + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write(data) + })) +} + +// TestPipeline_ProviderParsing validates that FetchProviderModels correctly +// extracts provider, name, and pricing from a real-shape OpenRouter response. +func TestPipeline_ProviderParsing(t *testing.T) { + srv := serveFile(t, "openrouter_models.json") + defer srv.Close() + + models, err := cloudmodels.FetchProviderModels(context.Background(), srv.URL) + if err != nil { + t.Fatalf("FetchProviderModels failed: %v", err) + } + + byName := make(map[string]ollama.Model) + for _, m := range models { + byName[m.Name] = m + } + + // Verify provider extraction + cases := []struct{ id, wantProvider string }{ + {"anthropic/claude-opus-4-5", "anthropic"}, + {"openai/gpt-4o", "openai"}, + {"google/gemini-2-5-pro", "google"}, + {"deepseek/deepseek-r1", "deepseek"}, + } + for _, c := range cases { + m, ok := byName[c.id] + if !ok { + t.Errorf("model %q not found in results", c.id) + continue + } + if m.Provider != c.wantProvider { + t.Errorf("%q: provider want %q got %q", c.id, c.wantProvider, m.Provider) + } + } + + // Verify completion price is parsed (non-zero for paid models) + claude := byName["anthropic/claude-opus-4-5"] + if claude.CompletionPricePerToken != 0.000075 { + t.Errorf("claude price: want 0.000075, got %v", claude.CompletionPricePerToken) + } + cheap := byName["openai/gpt-4o-mini"] + if cheap.CompletionPricePerToken != 0.0000006 { + t.Errorf("gpt-4o-mini price: want 0.0000006, got %v", cheap.CompletionPricePerToken) + } +} + +// TestPipeline_FilterLatestGen validates that version deduplication keeps +// the correct (highest-version) model per family. +func TestPipeline_FilterLatestGen(t *testing.T) { + srv := serveFile(t, "openrouter_models.json") + defer srv.Close() + + raw, err := cloudmodels.FetchProviderModels(context.Background(), srv.URL) + if err != nil { + t.Fatalf("FetchProviderModels failed: %v", err) + } + filtered := cloudmodels.FilterLatestGen(raw) + + byName := make(map[string]bool) + for _, m := range filtered { + byName[m.Name] = true + } + + // claude-opus-4-5 should survive, claude-opus-4 should be filtered + if !byName["anthropic/claude-opus-4-5"] { + t.Error("expected claude-opus-4-5 to survive (higher version)") + } + if byName["anthropic/claude-opus-4"] { + t.Error("expected claude-opus-4 to be filtered (older version)") + } + + // gemini-2-5-pro should survive; gemini-2-0-flash and gemini-2-5-flash have + // non-numeric suffixes so FilterLatestGen treats them as separate families — both survive. + if !byName["google/gemini-2-5-pro"] { + t.Error("expected gemini-2-5-pro to survive") + } + + // gpt-4o and gpt-4o-mini are different families (mini suffix changes family) + if !byName["openai/gpt-4o"] { + t.Error("expected gpt-4o to survive") + } +} + +// TestPipeline_SWEBenchScoreParsing validates FetchSWEBenchScores parses +// the real leaderboards.json format correctly. +func TestPipeline_SWEBenchScoreParsing(t *testing.T) { + srv := serveFile(t, "swebench_leaderboards.json") + defer srv.Close() + + scores, err := ollama.FetchSWEBenchScores(context.Background(), srv.URL) + if err != nil { + t.Fatalf("FetchSWEBenchScores failed: %v", err) + } + + // Direct tag match (e.g. "Model: claude-opus-4-5") + if s, ok := scores["claude-opus-4-5"]; !ok || s != 72.5 { + t.Errorf("claude-opus-4-5: want 72.5, got %v (ok=%v)", s, ok) + } + if s, ok := scores["gpt-4o"]; !ok || s != 57.4 { + t.Errorf("gpt-4o: want 57.4, got %v (ok=%v)", s, ok) + } + + // HuggingFace URL tag — should be stored under repo path AND model name + if s, ok := scores["deepseek-ai/deepseek-r1"]; !ok || s != 49.2 { + t.Errorf("deepseek-ai/deepseek-r1: want 49.2, got %v (ok=%v)", s, ok) + } + if s, ok := scores["deepseek-r1"]; !ok || s != 49.2 { + t.Errorf("deepseek-r1 (short name): want 49.2, got %v (ok=%v)", s, ok) + } + if s, ok := scores["qwen/qwen2.5-coder-32b-instruct"]; !ok || s != 35.0 { + t.Errorf("qwen repo path: want 35.0, got %v (ok=%v)", s, ok) + } +} + +// TestPipeline_EndToEnd validates the full pipeline: fetch → filter → rank. +// Checks that cloud models get correct SWE scores matched from SWE-bench data, +// and that no model inherits a score from an unrelated model via loose substring matching. +func TestPipeline_EndToEnd(t *testing.T) { + orSrv := serveFile(t, "openrouter_models.json") + defer orSrv.Close() + + sweSrv := serveFile(t, "swebench_leaderboards.json") + defer sweSrv.Close() + + raw, err := cloudmodels.FetchProviderModels(context.Background(), orSrv.URL) + if err != nil { + t.Fatalf("FetchProviderModels: %v", err) + } + models := cloudmodels.FilterLatestGen(raw) + + scores, err := ollama.FetchSWEBenchScores(context.Background(), sweSrv.URL) + if err != nil { + t.Fatalf("FetchSWEBenchScores: %v", err) + } + + ranked := ollama.RankModels(models, len(models), scores, nil, 0, "swe") + + byName := make(map[string]ollama.RankedModel) + for _, r := range ranked { + byName[r.Name] = r + } + + // claude-opus-4-5: SWE-bench tag is "claude-opus-4-5" — direct match expected + claude := byName["anthropic/claude-opus-4-5"] + if claude.SWEScore != 72.5 { + t.Errorf("claude-opus-4-5 SWE score: want 72.5, got %v (source=%q)", claude.SWEScore, claude.ScoreSource) + } + if claude.ScoreSource != "SWE" { + t.Errorf("claude-opus-4-5 score source: want SWE, got %q", claude.ScoreSource) + } + + // gpt-4o: SWE-bench tag is "gpt-4o" — direct match expected + gpt4o := byName["openai/gpt-4o"] + if gpt4o.SWEScore != 57.4 { + t.Errorf("gpt-4o SWE score: want 57.4, got %v", gpt4o.SWEScore) + } + + // gpt-4o-mini: no SWE-bench entry — must NOT inherit gpt-4o's score via substring + // "gpt-4o" is a substring of "gpt-4o-mini", loose matching would wrongly return 57.4 + mini := byName["openai/gpt-4o-mini"] + if mini.SWEScore == 57.4 { + t.Errorf("gpt-4o-mini wrongly inherited gpt-4o score (substring match too loose)") + } + + // deepseek-r1-0528: FilterLatestGen keeps this over deepseek-r1 (same family, higher version). + // Score matched via HF URL tag "deepseek-ai/DeepSeek-R1-0528" → NormalizeCloudID strategy. + deepseek := byName["deepseek/deepseek-r1-0528"] + if deepseek.SWEScore != 57.6 { + t.Errorf("deepseek-r1-0528 SWE score: want 57.6, got %v (source=%q)", deepseek.SWEScore, deepseek.ScoreSource) + } + + // Validate ranked list is sorted by SWE score descending (swe sort) + for i := 1; i < len(ranked); i++ { + if ranked[i].SWEScore > ranked[i-1].SWEScore { + t.Errorf("rank %d (%s=%.1f) > rank %d (%s=%.1f): not sorted by SWE", + i+1, ranked[i].Name, ranked[i].SWEScore, + i, ranked[i-1].Name, ranked[i-1].SWEScore) + } + } +} diff --git a/internal/ollama/hardware.go b/internal/ollama/hardware.go index ebdcdde..818aabd 100644 --- a/internal/ollama/hardware.go +++ b/internal/ollama/hardware.go @@ -6,6 +6,47 @@ import ( "strings" ) +// appleSiliconBandwidth maps (generation, tier) → memory bandwidth in GB/s. +// Sources: Apple spec sheets and llama.cpp community benchmarks. +var appleSiliconBandwidth = map[int]map[string]float64{ + 1: {"": 68, "pro": 200, "max": 400, "ultra": 800}, + 2: {"": 100, "pro": 200, "max": 400, "ultra": 800}, + 3: {"": 100, "pro": 150, "max": 400, "ultra": 800}, + 4: {"": 120, "pro": 273, "max": 546, "ultra": 1092}, +} + +// ParseAppleSiliconBandwidth parses a CPU brand string like "Apple M4 Pro" +// and returns the corresponding memory bandwidth in GB/s, or 0 if unrecognised. +func ParseAppleSiliconBandwidth(brandString string) float64 { + const prefix = "Apple M" + if !strings.HasPrefix(brandString, prefix) { + return 0 + } + rest := brandString[len(prefix):] + + // Extract generation digits. + i := 0 + for i < len(rest) && rest[i] >= '0' && rest[i] <= '9' { + i++ + } + if i == 0 { + return 0 + } + gen, err := strconv.Atoi(rest[:i]) + if err != nil { + return 0 + } + + // Tier: "Pro", "Max", "Ultra", or "" (base) — normalised to lowercase. + tier := strings.ToLower(strings.TrimSpace(rest[i:])) + + tiers, ok := appleSiliconBandwidth[gen] + if !ok { + return 0 + } + return tiers[tier] // 0 if tier unknown +} + // ParseParamSize parses a parameter size string like "32B" or "671M" into // billions of parameters. Returns 0 if unparseable. func ParseParamSize(s string) float64 { @@ -56,3 +97,15 @@ func CheckHardware(parameterSize string, systemRAMGB float64) (bool, float64) { return needed <= systemRAMGB, needed } +// CheckHardwareSafe checks if a model fits within 75% of available RAM. +// Uses a conservative threshold: a 48 GB model won't run on 48 GB RAM. +// Returns (ok, neededGB). If parameter size is unknown, returns (true, 0). +func CheckHardwareSafe(parameterSize string, systemRAMGB float64) (bool, float64) { + paramsB := ParseParamSize(parameterSize) + if paramsB == 0 { + return true, 0 + } + needed := EstimateRAMGB(paramsB) + return needed <= systemRAMGB*0.75, needed +} + diff --git a/internal/ollama/hardware_darwin.go b/internal/ollama/hardware_darwin.go index ef1462b..a52dee7 100644 --- a/internal/ollama/hardware_darwin.go +++ b/internal/ollama/hardware_darwin.go @@ -18,3 +18,14 @@ func GetSystemRAMGB() float64 { } return float64(bytes) / (1024 * 1024 * 1024) } + +// DetectAppleSiliconBandwidthGBs returns the memory bandwidth in GB/s for the +// current Apple Silicon chip, or 0 if unrecognised. +// Reads "machdep.cpu.brand_string" via sysctl. +func DetectAppleSiliconBandwidthGBs() float64 { + out, err := exec.Command("sysctl", "-n", "machdep.cpu.brand_string").Output() + if err != nil { + return 0 + } + return ParseAppleSiliconBandwidth(strings.TrimSpace(string(out))) +} diff --git a/internal/ollama/hardware_linux.go b/internal/ollama/hardware_linux.go index 2345c4e..83ee26d 100644 --- a/internal/ollama/hardware_linux.go +++ b/internal/ollama/hardware_linux.go @@ -10,3 +10,6 @@ func GetSystemRAMGB() float64 { } return float64(info.Totalram) * float64(info.Unit) / (1024 * 1024 * 1024) } + +// DetectAppleSiliconBandwidthGBs returns 0 on Linux (not Apple Silicon). +func DetectAppleSiliconBandwidthGBs() float64 { return 0 } diff --git a/internal/ollama/hardware_test.go b/internal/ollama/hardware_test.go index 40cc0c5..2655a02 100644 --- a/internal/ollama/hardware_test.go +++ b/internal/ollama/hardware_test.go @@ -105,3 +105,29 @@ func TestGetSystemRAMGB(t *testing.T) { t.Errorf("expected positive system RAM, got %.1f", ram) } } + +func TestCheckHardwareSafe_FitsWithin75Pct(t *testing.T) { + // 8B model needs ~6.4 GB, 75% of 16 GB = 12 GB → fits + ok, needed := ollama.CheckHardwareSafe("8B", 16.0) + if !ok { + t.Errorf("expected 8B to fit safely in 16GB (needed=%.1f)", needed) + } +} + +func TestCheckHardwareSafe_DoesNotFitAt75Pct(t *testing.T) { + // 70B model needs ~40.5 GB, 75% of 48 GB = 36 GB → does NOT fit safely + ok, needed := ollama.CheckHardwareSafe("70B", 48.0) + if ok { + t.Errorf("expected 70B to not fit safely in 48GB (needed=%.1f)", needed) + } +} + +func TestCheckHardwareSafe_UnknownSize(t *testing.T) { + ok, needed := ollama.CheckHardwareSafe("", 16.0) + if !ok { + t.Error("expected unknown size to return ok") + } + if needed != 0 { + t.Errorf("expected needed=0 for unknown, got %v", needed) + } +} diff --git a/internal/ollama/ollama.go b/internal/ollama/ollama.go index 86c6503..17dd576 100644 --- a/internal/ollama/ollama.go +++ b/internal/ollama/ollama.go @@ -3,6 +3,7 @@ package ollama import ( "context" "fmt" + "math" "net/http" "net/url" "sort" @@ -14,18 +15,22 @@ import ( // Model represents a locally available ollama model. type Model struct { - Name string - Size int64 - ParameterSize string - Family string + Name string + Size int64 + ParameterSize string + Family string + Provider string // "" or "ollama" = local; otherwise cloud provider (e.g. "anthropic") + CompletionPricePerToken float64 // USD per token; 0 for local models } // RankedModel is a Model with its SWE-bench score and rank position. type RankedModel struct { Model - SWEScore float64 - Rank int - ScoreSource string // "SWE", "est", or "" (no score) + SWEScore float64 + Rank int + ScoreSource string // "SWE", "est", or "" (no score) + SpeedTPM float64 // estimated tokens per minute + RecommendedScore float64 // composite score for default ranking } // DefaultBaseURL is the default ollama API endpoint. @@ -74,31 +79,56 @@ func FetchModels(ctx context.Context, baseURL string) ([]Model, error) { return models, nil } -// RankModels sorts models by SWE-bench score (descending) and limits to top N. +// RankModels sorts models by composite score (descending) and limits to top N. // It tries multiple matching strategies in order: // 1. Direct family match against sweScores (MatchModelScore) // 2. HF repo ID match against sweScores (FindSWEScore via hfInfoMap) -// 3. Hardcoded fallback ratings +// 3. Cloud model NormalizeCloudID match against sweScores +// 4. Hardcoded fallback ratings // // ScoreSource is set to "SWE" for live matches, "est" for fallback, "" for no score. -func RankModels(models []Model, limit int, sweScores map[string]float64, hfInfoMap map[string]HFModelInfo) []RankedModel { +// sortBy can be "swe", "speed", "size", or "" / "recommended". +func RankModels(models []Model, limit int, sweScores map[string]float64, hfInfoMap map[string]HFModelInfo, systemRAMGB float64, sortBy string) []RankedModel { if len(models) == 0 { return nil } + bandwidthGBs := DetectAppleSiliconBandwidthGBs() + ranked := make([]RankedModel, len(models)) for i, m := range models { score, source := resolveScore(m, sweScores, hfInfoMap) + speed := resolveSpeed(m, bandwidthGBs) + ramFit := ramFitMultiplier(m, systemRAMGB) ranked[i] = RankedModel{ - Model: m, - SWEScore: score, - ScoreSource: source, + Model: m, + SWEScore: score, + ScoreSource: source, + SpeedTPM: speed, + RecommendedScore: ComputeRecommendedScore(score, speed, ramFit), } } sort.Slice(ranked, func(i, j int) bool { - if ranked[i].SWEScore != ranked[j].SWEScore { - return ranked[i].SWEScore > ranked[j].SWEScore + switch sortBy { + case "swe": + if ranked[i].SWEScore != ranked[j].SWEScore { + return ranked[i].SWEScore > ranked[j].SWEScore + } + case "speed": + if ranked[i].SpeedTPM != ranked[j].SpeedTPM { + return ranked[i].SpeedTPM > ranked[j].SpeedTPM + } + case "size": + si := ParseParamSize(ranked[i].ParameterSize) + sj := ParseParamSize(ranked[j].ParameterSize) + if si != sj { + return si > sj + } + default: // "recommended" or "" + if ranked[i].RecommendedScore != ranked[j].RecommendedScore { + return ranked[i].RecommendedScore > ranked[j].RecommendedScore + } } return ranked[i].Name < ranked[j].Name }) @@ -134,6 +164,21 @@ func resolveScore(m Model, sweScores map[string]float64, hfInfoMap map[string]HF } } + // Strategy 2b: for cloud models, try live SWE-bench scores first (exact + // normalized-ID match), then fall back to hardcoded ratings. + // Live scores always take priority over estimates. + if m.Provider != "" && m.Provider != "ollama" { + normalized := NormalizeCloudID(m.Name) + if sweScores != nil { + if s, ok := sweScores[normalized]; ok { + return s, "SWE" + } + } + if s, ok := lookupCloudRating(normalized); ok { + return s, "est" + } + } + // Strategy 3: hardcoded fallback (exact name match). if s := sweBenchRatings[m.Name]; s > 0 { return s, "est" @@ -148,6 +193,65 @@ func resolveScore(m Model, sweScores map[string]float64, hfInfoMap map[string]HF return 0, "" } +// ComputeRecommendedScore computes the composite ranking score. +// swe: SWE-bench %, speedTPM: tokens/min, ramFit: graduated multiplier (0–1). +// +// SWE quality dominates (90% weight). Speed contributes a small bonus (up to 5 +// points) so that among models with equal SWE scores, faster wins — but a model +// with 8% lower SWE cannot overcome a 3-point speed bonus. +func ComputeRecommendedScore(swe, speedTPM, ramFit float64) float64 { + speedBonus := math.Min(speedTPM/6000, 5) // max +5 pts at ≥30K T/m + return (swe*0.90 + speedBonus) * ramFit +} + +// ramFitMultiplier returns a graduated score based on how much of available RAM the model uses. +// Cloud models always return 1.0 (they need no local RAM). +// For local models, the multiplier decreases as RAM usage increases: +// +// ≤ 60% of RAM → 1.0 (green: comfortable, leaves room for OS + KV cache) +// ≤ 75% of RAM → 0.9 (yellow: safe, within conservative threshold) +// ≤ 90% of RAM → 0.5 (orange: tight, memory pressure likely) +// ≤ 100% of RAM → 0.1 (red: technically fits but risky) +// > 100% of RAM → 0.0 (won't fit) +// +// The 60% sweet spot follows the consensus that ~40% of RAM should remain free +// for the OS, KV cache growth, and other processes during inference. +func ramFitMultiplier(m Model, systemRAMGB float64) float64 { + if m.Provider != "" && m.Provider != "ollama" { + return 1.0 // cloud models need 0 RAM + } + if systemRAMGB <= 0 { + return 1.0 // no RAM info + } + paramsB := ParseParamSize(m.ParameterSize) + if paramsB == 0 { + return 1.0 // unknown size — can't penalise + } + needed := EstimateRAMGB(paramsB) + ratio := needed / systemRAMGB + switch { + case ratio <= 0.60: + return 1.0 + case ratio <= 0.75: + return 0.9 + case ratio <= 0.90: + return 0.5 + case ratio <= 1.00: + return 0.1 + default: + return 0.0 + } +} + +// resolveSpeed estimates tokens/min for a model. +// bandwidthGBs is the detected Apple Silicon memory bandwidth (0 on other platforms). +func resolveSpeed(m Model, bandwidthGBs float64) float64 { + if m.Provider != "" && m.Provider != "ollama" { + return EstimateCloudSpeedTPM(m.CompletionPricePerToken) + } + return EstimateLocalSpeedTPM(ParseParamSize(m.ParameterSize), bandwidthGBs) +} + // FormatTOMLSnippet generates a commented-out TOML snippet for devcell.toml // from ranked models. func FormatTOMLSnippet(ranked []RankedModel) string { diff --git a/internal/ollama/ollama_test.go b/internal/ollama/ollama_test.go index 9193776..be0c652 100644 --- a/internal/ollama/ollama_test.go +++ b/internal/ollama/ollama_test.go @@ -21,7 +21,7 @@ func TestRankModels_SortsBySWEScore(t *testing.T) { {Name: "deepseek-r1:32b", ParameterSize: "32B"}, } - ranked := ollama.RankModels(models, 10, nil, nil) + ranked := ollama.RankModels(models, 10, nil, nil, 0, "") if len(ranked) != 3 { t.Fatalf("expected 3 models, got %d", len(ranked)) @@ -43,7 +43,7 @@ func TestRankModels_LimitsToTopN(t *testing.T) { {Name: "qwen3:8b"}, } - ranked := ollama.RankModels(models, 2, nil, nil) + ranked := ollama.RankModels(models, 2, nil, nil, 0, "") if len(ranked) != 2 { t.Fatalf("expected 2 models, got %d", len(ranked)) @@ -56,7 +56,7 @@ func TestRankModels_UnknownModelsGetZeroScore(t *testing.T) { {Name: "deepseek-r1:32b"}, } - ranked := ollama.RankModels(models, 10, nil, nil) + ranked := ollama.RankModels(models, 10, nil, nil, 0, "") if len(ranked) != 2 { t.Fatalf("expected 2 models, got %d", len(ranked)) @@ -72,7 +72,7 @@ func TestRankModels_UnknownModelsGetZeroScore(t *testing.T) { } func TestRankModels_Empty(t *testing.T) { - ranked := ollama.RankModels(nil, 10, nil, nil) + ranked := ollama.RankModels(nil, 10, nil, nil, 0, "") if len(ranked) != 0 { t.Errorf("expected empty result, got %d", len(ranked)) } @@ -85,7 +85,7 @@ func TestRankModels_RankNumbersAreSequential(t *testing.T) { {Name: "deepseek-r1:32b"}, } - ranked := ollama.RankModels(models, 10, nil, nil) + ranked := ollama.RankModels(models, 10, nil, nil, 0, "") for i, r := range ranked { if r.Rank != i+1 { @@ -106,7 +106,7 @@ func TestRankModels_UsesLiveSWEScores(t *testing.T) { "qwen3": 28.0, } - ranked := ollama.RankModels(models, 10, liveScores, nil) + ranked := ollama.RankModels(models, 10, liveScores, nil, 0, "") if ranked[0].Name != "deepseek-r1:32b" || ranked[0].SWEScore != 49.2 { t.Errorf("expected deepseek-r1:32b with 49.2, got %s with %.1f", ranked[0].Name, ranked[0].SWEScore) @@ -130,7 +130,7 @@ func TestRankModels_LiveScoresOverrideFallback(t *testing.T) { "deepseek-r1": 99.9, } - ranked := ollama.RankModels(models, 10, liveScores, nil) + ranked := ollama.RankModels(models, 10, liveScores, nil, 0, "") if ranked[0].SWEScore != 99.9 { t.Errorf("expected live score 99.9 to override fallback, got %.1f", ranked[0].SWEScore) @@ -145,7 +145,7 @@ func TestRankModels_ScoreSourceSWE(t *testing.T) { "deepseek-r1": 49.2, } - ranked := ollama.RankModels(models, 10, liveScores, nil) + ranked := ollama.RankModels(models, 10, liveScores, nil, 0, "") if ranked[0].ScoreSource != "SWE" { t.Errorf("expected ScoreSource=SWE, got %q", ranked[0].ScoreSource) @@ -157,7 +157,7 @@ func TestRankModels_ScoreSourceEst(t *testing.T) { {Name: "deepseek-r1:32b"}, } - ranked := ollama.RankModels(models, 10, nil, nil) + ranked := ollama.RankModels(models, 10, nil, nil, 0, "") if ranked[0].ScoreSource != "est" { t.Errorf("expected ScoreSource=est, got %q", ranked[0].ScoreSource) @@ -169,7 +169,7 @@ func TestRankModels_ScoreSourceEmpty_WhenNoScore(t *testing.T) { {Name: "totally-unknown:latest"}, } - ranked := ollama.RankModels(models, 10, nil, nil) + ranked := ollama.RankModels(models, 10, nil, nil, 0, "") if ranked[0].ScoreSource != "" { t.Errorf("expected empty ScoreSource for unknown model, got %q", ranked[0].ScoreSource) @@ -191,7 +191,7 @@ func TestRankModels_UsesHFRepoIDForSWEMatch(t *testing.T) { "qwen2.5-coder": {ModelID: "Qwen/Qwen2.5-Coder-32B-Instruct"}, } - ranked := ollama.RankModels(models, 10, sweScores, hfInfoMap) + ranked := ollama.RankModels(models, 10, sweScores, hfInfoMap, 0, "") if ranked[0].SWEScore != 35.0 { t.Errorf("expected SWE score 35.0 via HF repo ID, got %.1f", ranked[0].SWEScore) @@ -341,6 +341,58 @@ func TestFormatActiveTOMLSnippet_Empty(t *testing.T) { } } +func TestComputeRecommendedScore_Basic(t *testing.T) { + // swe=50, speedTPM=9000, ramFit=1.0 + // speedBonus = min(9000/6000, 5) = min(1.5, 5) = 1.5 + // score = (50*0.90 + 1.5) * 1.0 = 46.5 + got := ollama.ComputeRecommendedScore(50, 9000, 1.0) + if got != 46.5 { + t.Errorf("ComputeRecommendedScore(50, 9000, 1.0) = %v, want 46.5", got) + } +} + +func TestComputeRecommendedScore_RAMPenalty(t *testing.T) { + without := ollama.ComputeRecommendedScore(50, 9000, 1.0) + with := ollama.ComputeRecommendedScore(50, 9000, 0.1) + if with >= without/2 { + t.Errorf("expected heavy de-rank with ramFit=0.1: %v vs %v", with, without) + } +} + +func TestRankModels_NewFields(t *testing.T) { + models := []ollama.Model{ + {Name: "deepseek-r1:32b", ParameterSize: "32B"}, + } + ranked := ollama.RankModels(models, 10, nil, nil, 0, "") + if ranked[0].SpeedTPM <= 0 { + t.Errorf("expected SpeedTPM > 0, got %v", ranked[0].SpeedTPM) + } + if ranked[0].RecommendedScore <= 0 { + t.Errorf("expected RecommendedScore > 0 for a model with SWE score, got %v", ranked[0].RecommendedScore) + } +} + +func TestRankModels_SortBySWE(t *testing.T) { + models := []ollama.Model{ + {Name: "qwen3:8b", ParameterSize: "8B"}, + {Name: "deepseek-r1:70b", ParameterSize: "70B"}, + } + ranked := ollama.RankModels(models, 10, nil, nil, 0, "swe") + if ranked[0].Name != "deepseek-r1:70b" { + t.Errorf("expected deepseek-r1:70b first with sortBy=swe, got %s", ranked[0].Name) + } +} + +func TestRankModels_CloudProviderNoRAMPenalty(t *testing.T) { + // Cloud model with no param size: should get ramFit=1.0, not penalized + // Local 70B model on 8GB RAM: should get ramFit=0.1 + localScore := ollama.ComputeRecommendedScore(30, 720, 0.1) // 70B on 8GB + cloudScore := ollama.ComputeRecommendedScore(30, 5400, 1.0) // cloud model same SWE score + if localScore >= cloudScore { + t.Errorf("expected cloud model to score higher than RAM-penalized local: cloud=%v local=%v", cloudScore, localScore) + } +} + func contains(s, sub string) bool { return len(s) > 0 && len(sub) > 0 && indexOf(s, sub) >= 0 } diff --git a/internal/ollama/ratings.go b/internal/ollama/ratings.go index ff7abd9..068c7a5 100644 --- a/internal/ollama/ratings.go +++ b/internal/ollama/ratings.go @@ -1,5 +1,7 @@ package ollama +import "strings" + // sweBenchRatings maps ollama model names to estimated coding capability scores. // // These are NOT direct SWE-bench Verified scores for the quantized variants. @@ -103,3 +105,123 @@ var sweBenchRatings = map[string]float64{ "llama4:maverick": 12.0, "llama4:scout": 6.0, } + +// cloudModelRatings maps normalized cloud model IDs (output of NormalizeCloudID) +// to SWE-bench Verified percentage scores. +// +// Keys are matched with prefix logic so "claude-haiku-4-5-20251001" matches +// "claude-haiku-4-5". Use the shortest unambiguous prefix as the key. +// +// Sources: +// - SWE-bench Verified leaderboard: https://www.swebench.com/ +// - Anthropic model cards and benchmark announcements +// - OpenAI o-series and GPT benchmark disclosures +// - Google Gemini technical reports +var cloudModelRatings = map[string]float64{ + // Anthropic — SWE-bench Verified scores + "claude-opus-4-5": 76.8, // Claude Opus 4.5 — confirmed leaderboard entry + "claude-opus-4-6": 76.8, // Claude Opus 4.6 — same family, conservative estimate + "claude-opus-4": 72.0, // Claude Opus 4 + "claude-sonnet-4-5": 72.7, // Claude Sonnet 4.5 + "claude-sonnet-4": 49.0, // Claude Sonnet 4 + "claude-haiku-4-5": 43.0, // Claude Haiku 4.5 + "claude-3-7-sonnet": 62.3, // Claude 3.7 Sonnet + "claude-3-5-sonnet": 49.0, // Claude 3.5 Sonnet (2024-10) + "claude-3-5-haiku": 40.6, // Claude 3.5 Haiku + "claude-3-opus": 11.1, // Claude 3 Opus (older baseline) + + // OpenAI — SWE-bench Verified scores + "o3": 71.7, // o3 (high-compute) + "o4-mini": 68.1, // o4-mini + "o3-mini": 49.3, // o3-mini + "gpt-4-1": 54.6, // GPT-4.1 + "o1": 48.9, // o1 + "gpt-4o": 33.2, // GPT-4o + "gpt-4-1-mini": 34.6, // GPT-4.1 mini + "o1-mini": 16.7, // o1-mini + "gpt-4o-mini": 23.7, // GPT-4o mini + + // Google — SWE-bench Verified scores + "gemini-2-5-pro": 63.8, // Gemini 2.5 Pro + "gemini-2-0-flash": 41.3, // Gemini 2.0 Flash + "gemini-1-5-pro": 26.7, // Gemini 1.5 Pro + "gemini-1-5-flash": 18.6, // Gemini 1.5 Flash +} + +// lookupCloudRating finds a score for a normalized cloud model ID. +// Tries exact match first, then prefix match (longest wins). +func lookupCloudRating(normalized string) (float64, bool) { + if s, ok := cloudModelRatings[normalized]; ok { + return s, true + } + var bestScore float64 + var bestLen int + for key, score := range cloudModelRatings { + if strings.HasPrefix(normalized, key) && len(key) > bestLen { + bestScore = score + bestLen = len(key) + } + } + if bestLen > 0 { + return bestScore, true + } + return 0, false +} + +// EstimateCloudSpeedTPM estimates cloud model speed (tokens/min) from +// completion price per token (USD). Cheaper models are typically faster. +func EstimateCloudSpeedTPM(pricePerToken float64) float64 { + switch { + case pricePerToken < 0.000001: // < $1/1M tokens + return 18000 + case pricePerToken < 0.000005: // < $5/1M + return 9000 + case pricePerToken < 0.000015: // < $15/1M + return 5400 + case pricePerToken < 0.000050: // < $50/1M + return 2400 + default: + return 1200 + } +} + +// EstimateLocalSpeedTPM estimates local model speed (tokens/min). +// +// When bandwidthGBs > 0 (Apple Silicon detected), uses the physics formula: +// +// speed = bandwidth / model_weight_bytes_per_token +// +// Q4_K_M quantization ≈ 0.5625 bytes/param (4.5 bits/param). +// Capped at 9000 T/min (150 tok/s) — the compute-bound ceiling for tiny models. +// +// When bandwidthGBs == 0, falls back to generic tier estimates for +// average consumer GPU hardware. +func EstimateLocalSpeedTPM(paramsB, bandwidthGBs float64) float64 { + if bandwidthGBs > 0 && paramsB > 0 { + const ( + q4BytesPerParam = 0.5625 // Q4_K_M ≈ 4.5 bits/param = 0.5625 bytes/param + bandwidthEff = 0.78 // llama.cpp Metal achieves ~75-80% of theoretical bandwidth + maxTokPerSec = 200 // compute-bound ceiling for sub-3B models on Apple Silicon + ) + tokPerSec := (bandwidthGBs * bandwidthEff) / (paramsB * q4BytesPerParam) + if tokPerSec > maxTokPerSec { + tokPerSec = maxTokPerSec + } + return tokPerSec * 60 + } + // Generic tier fallback (average consumer GPU). + switch { + case paramsB <= 3: + return 9000 + case paramsB <= 8: + return 4800 + case paramsB <= 14: + return 2700 + case paramsB <= 32: + return 1500 + case paramsB <= 70: + return 720 + default: + return 180 + } +} diff --git a/internal/ollama/ratings_test.go b/internal/ollama/ratings_test.go new file mode 100644 index 0000000..cc008a1 --- /dev/null +++ b/internal/ollama/ratings_test.go @@ -0,0 +1,100 @@ +package ollama_test + +import ( + "testing" + + "github.com/DimmKirr/devcell/internal/ollama" +) + +func TestEstimateCloudSpeedTPM(t *testing.T) { + tests := []struct { + pricePerToken float64 + expected float64 + }{ + {0.0000005, 18000}, // < $1/1M → very fast + {0.000001, 9000}, // boundary: exactly $1/1M, falls to next bucket + {0.0000009, 18000}, // just under $1/1M + {0.000002, 9000}, // $1-5/1M + {0.000008, 5400}, // $5-15/1M + {0.000030, 2400}, // $15-50/1M + {0.000100, 1200}, // > $50/1M → premium/slow + } + for _, tt := range tests { + got := ollama.EstimateCloudSpeedTPM(tt.pricePerToken) + if got != tt.expected { + t.Errorf("EstimateCloudSpeedTPM(%v) = %v, want %v", tt.pricePerToken, got, tt.expected) + } + } +} + +func TestEstimateLocalSpeedTPM_GenericTiers(t *testing.T) { + // bandwidthGBs=0 → generic tier fallback (average consumer GPU). + tests := []struct { + paramsB float64 + expected float64 + }{ + {1.5, 9000}, // ≤3B + {3.0, 9000}, // ≤3B boundary + {7.0, 4800}, // ≤8B + {8.0, 4800}, // ≤8B boundary + {14.0, 2700}, // ≤14B boundary + {32.0, 1500}, // ≤32B + {70.0, 720}, // ≤70B + {671.0, 180}, // >70B (MoE) + {0.0, 9000}, // unknown → fast estimate + {-1.0, 9000}, // negative input → fast estimate + } + for _, tt := range tests { + got := ollama.EstimateLocalSpeedTPM(tt.paramsB, 0) + if got != tt.expected { + t.Errorf("EstimateLocalSpeedTPM(%v, 0) = %v, want %v", tt.paramsB, got, tt.expected) + } + } +} + +func TestEstimateLocalSpeedTPM_AppleSilicon(t *testing.T) { + // M4 Pro: 273 GB/s × 0.78 efficiency. Formula: (bw*0.78) / (params*0.5625) * 60. + const bw = 273.0 + tests := []struct { + paramsB float64 + wantApprox float64 // expected T/min + }{ + {0.6, 12000}, // sub-1B: compute-bound → capped at 200 tok/s = 12000 T/m + {8, 2839}, // (273*0.78)/(8*0.5625)*60 ≈ 2839 + {32, 710}, // (273*0.78)/(32*0.5625)*60 ≈ 710 + {70, 325}, // (273*0.78)/(70*0.5625)*60 ≈ 325 + } + for _, tt := range tests { + got := ollama.EstimateLocalSpeedTPM(tt.paramsB, bw) + // Allow ±5% tolerance for floating-point rounding. + delta := tt.wantApprox * 0.05 + if got < tt.wantApprox-delta || got > tt.wantApprox+delta { + t.Errorf("EstimateLocalSpeedTPM(%v, %v) = %.0f, want ~%.0f (±5%%)", + tt.paramsB, bw, got, tt.wantApprox) + } + } +} + +func TestParseAppleSiliconBandwidth(t *testing.T) { + tests := []struct { + brand string + want float64 + }{ + {"Apple M4 Pro", 273}, + {"Apple M4 Max", 546}, + {"Apple M4", 120}, + {"Apple M3 Max", 400}, + {"Apple M3 Pro", 150}, + {"Apple M2 Max", 400}, + {"Apple M1 Ultra", 800}, + {"Intel Core i9-13900H", 0}, + {"GenuineIntel", 0}, + {"Apple M99 Pro", 0}, // unknown generation + } + for _, tt := range tests { + got := ollama.ParseAppleSiliconBandwidth(tt.brand) + if got != tt.want { + t.Errorf("ParseAppleSiliconBandwidth(%q) = %v, want %v", tt.brand, got, tt.want) + } + } +} diff --git a/internal/ollama/swebench.go b/internal/ollama/swebench.go index 313971c..e558c31 100644 --- a/internal/ollama/swebench.go +++ b/internal/ollama/swebench.go @@ -7,6 +7,8 @@ import ( "net/http" "strings" "time" + + "github.com/DimmKirr/devcell/internal/cache" ) // SWEBenchURL is the default URL for SWE-bench leaderboard data. @@ -35,7 +37,15 @@ type lbEntry struct { // // All entries are included (not filtered by os_model) so that HF repo ID // matching can find scores for any model. +// SWEBenchCacheTTL is the on-disk cache lifetime for SWE-bench leaderboard data. +const SWEBenchCacheTTL = 6 * time.Hour + func FetchSWEBenchScores(ctx context.Context, url string) (map[string]float64, error) { + const cacheKey = "swebench-scores.json" + if scores, ok := cache.Load[map[string]float64](cacheKey, SWEBenchCacheTTL); ok { + return scores, nil + } + ctx, cancel := context.WithTimeout(ctx, 15*time.Second) defer cancel() @@ -99,6 +109,7 @@ func FetchSWEBenchScores(ctx context.Context, url string) (map[string]float64, e } } + cache.Save(cacheKey, scores) return scores, nil } @@ -190,6 +201,23 @@ func modelFamily(name string) string { return name } +// NormalizeCloudID converts an OpenRouter model ID to a SWE-bench-comparable key. +// Strips provider prefix, converts dots to dashes, strips :variant suffixes. +// "anthropic/claude-opus-4.6" → "claude-opus-4-6" +func NormalizeCloudID(id string) string { + // Strip provider prefix (OpenRouter IDs have exactly one '/'). + // e.g. "anthropic/claude-opus-4.6" → "claude-opus-4.6" + if idx := strings.LastIndex(id, "/"); idx >= 0 { + id = id[idx+1:] + } + // Strip :variant: "model:preview" → "model" + if idx := strings.Index(id, ":"); idx >= 0 { + id = id[:idx] + } + // Dots to dashes for SWE-bench key compatibility. + return strings.ReplaceAll(id, ".", "-") +} + // matchFallbackScore finds a fallback rating for a model by checking if any // rated model name is a prefix of the given name, or if they share the same // family. Handles variants like "qwen3-coder:30b-128k" matching diff --git a/internal/ollama/swebench_test.go b/internal/ollama/swebench_test.go index 730cfdb..e18c7ca 100644 --- a/internal/ollama/swebench_test.go +++ b/internal/ollama/swebench_test.go @@ -55,6 +55,7 @@ const testLeaderboardJSON = `{ func serveSWEBench(t *testing.T, payload string) *httptest.Server { t.Helper() + t.Setenv("XDG_CACHE_HOME", t.TempDir()) // isolate on-disk cache per test srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Write([]byte(payload)) })) @@ -145,6 +146,7 @@ func TestFetchSWEBenchScores_SkipsEntriesWithoutModelTag(t *testing.T) { } func TestFetchSWEBenchScores_ReturnsErrorOnBadJSON(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", t.TempDir()) srv := serveSWEBench(t, "not json") _, err := ollama.FetchSWEBenchScores(context.Background(), srv.URL) @@ -154,6 +156,7 @@ func TestFetchSWEBenchScores_ReturnsErrorOnBadJSON(t *testing.T) { } func TestFetchSWEBenchScores_ReturnsErrorOnHTTPFailure(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", t.TempDir()) _, err := ollama.FetchSWEBenchScores(context.Background(), "http://127.0.0.1:0") if err == nil { t.Error("expected error for unreachable server") @@ -161,6 +164,7 @@ func TestFetchSWEBenchScores_ReturnsErrorOnHTTPFailure(t *testing.T) { } func TestFetchSWEBenchScores_ReturnsErrorWhenNoVerifiedLeaderboard(t *testing.T) { + t.Setenv("XDG_CACHE_HOME", t.TempDir()) data := `{"leaderboards": [{"name": "Other", "results": []}]}` srv := serveSWEBench(t, data) @@ -287,3 +291,24 @@ func TestMatchModelScore_NilScores(t *testing.T) { t.Error("expected no match with nil scores") } } + +func TestNormalizeCloudID(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"anthropic/claude-opus-4.6", "claude-opus-4-6"}, + {"anthropic/claude-sonnet-4.5", "claude-sonnet-4-5"}, + {"google/gemini-2.5-pro", "gemini-2-5-pro"}, + {"openai/gpt-4.5", "gpt-4-5"}, + {"deepseek/deepseek-r1", "deepseek-r1"}, + {"claude-opus-4.6", "claude-opus-4-6"}, + {"openai/gpt-4o:preview", "gpt-4o"}, + } + for _, tt := range tests { + got := ollama.NormalizeCloudID(tt.input) + if got != tt.expected { + t.Errorf("NormalizeCloudID(%q) = %q, want %q", tt.input, got, tt.expected) + } + } +} diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 26c7723..f0687fd 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -101,7 +101,12 @@ func BuildArgv(spec RunSpec, fs FS, lookPath func(string) (string, error)) []str argv = append(argv, "op", "run", "--") } - argv = append(argv, "docker", "run", "--rm", "-it", "--shm-size=1g") + dockerRunFlags := []string{"--rm", "-it", "--shm-size=1g"} + if spec.CellCfg.Cell.DockerPrivileged { + dockerRunFlags = append(dockerRunFlags, "--privileged") + } + argv = append(argv, "docker", "run") + argv = append(argv, dockerRunFlags...) // Identity argv = append(argv, "--name", c.ContainerName) @@ -322,10 +327,10 @@ func EnsureNetwork(ctx context.Context) error { // --pull is always passed so Docker checks for a newer base image digest and // busts the layer cache when the upstream image has been updated. func BuildImage(ctx context.Context, configDir string, noCache bool, verbose bool, out io.Writer) error { - progress := "--progress=quiet" - if verbose { - progress = "--progress=plain" - } + // Always use plain progress so the full build log (including nix errors) + // is captured. In non-verbose mode the output goes to a buffer and is + // only displayed on failure. + progress := "--progress=plain" args := []string{"build", "-t", UserImageTag(), progress, "--build-arg", "GIT_COMMIT=" + version.GitCommit, } @@ -345,16 +350,8 @@ func BuildImage(ctx context.Context, configDir string, noCache bool, verbose boo } return nil } - if verbose { - cmd.Stdout = out - cmd.Stderr = out - } else { - // Suppress progress output; capture stderr so we can replay it on failure. - cmd.Stdout = io.Discard - cmd.Stderr = out - // Belt-and-suspenders: also tell BuildKit via env to use quiet mode. - cmd.Env = append(os.Environ(), "BUILDKIT_PROGRESS=quiet") - } + cmd.Stdout = out + cmd.Stderr = out if err := cmd.Run(); err != nil { if ctx.Err() != nil { return fmt.Errorf("docker build: interrupted") diff --git a/internal/runner/runner_test.go b/internal/runner/runner_test.go index c521f4d..09c126e 100644 --- a/internal/runner/runner_test.go +++ b/internal/runner/runner_test.go @@ -624,15 +624,15 @@ func TestParseImageMetadata_InvalidJSON(t *testing.T) { func TestStackImageTag_GoStack(t *testing.T) { got := runner.StackImageTag("go") // version.Version is v0.0.0 in tests → v0.0.0-go - if got != "ghcr.io/dimmkirr/devcell:v0.0.0-go" { - t.Errorf("want ghcr.io/dimmkirr/devcell:v0.0.0-go, got %q", got) + if got != "public.ecr.aws/w1l3v2k8/devcell:v0.0.0-go" { + t.Errorf("want public.ecr.aws/w1l3v2k8/devcell:v0.0.0-go, got %q", got) } } func TestStackImageTag_UltimateStack(t *testing.T) { got := runner.StackImageTag("ultimate") - if got != "ghcr.io/dimmkirr/devcell:v0.0.0-ultimate" { - t.Errorf("want ghcr.io/dimmkirr/devcell:v0.0.0-ultimate, got %q", got) + if got != "public.ecr.aws/w1l3v2k8/devcell:v0.0.0-ultimate" { + t.Errorf("want public.ecr.aws/w1l3v2k8/devcell:v0.0.0-ultimate, got %q", got) } } @@ -684,7 +684,7 @@ func TestArgv_AwsReadOnlyFalse(t *testing.T) { func TestBaseImageTag_DefaultIsVersioned(t *testing.T) { t.Setenv("DEVCELL_BASE_IMAGE", "") got := runner.BaseImageTag() - if got != "ghcr.io/dimmkirr/devcell:v0.0.0-core" { - t.Errorf("want ghcr.io/dimmkirr/devcell:v0.0.0-core, got %q", got) + if got != "public.ecr.aws/w1l3v2k8/devcell:v0.0.0-core" { + t.Errorf("want public.ecr.aws/w1l3v2k8/devcell:v0.0.0-core, got %q", got) } } diff --git a/internal/runner/vagrant.go b/internal/runner/vagrant.go new file mode 100644 index 0000000..a0e9a94 --- /dev/null +++ b/internal/runner/vagrant.go @@ -0,0 +1,430 @@ +package runner + +import ( + "bytes" + "context" + "encoding/csv" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/DimmKirr/devcell/internal/cfg" + "github.com/DimmKirr/devcell/internal/config" + "github.com/DimmKirr/devcell/internal/ux" +) + +// VagrantSpec holds everything needed to build a vagrant ssh argv. +type VagrantSpec struct { + Config config.Config + CellCfg cfg.CellConfig + Binary string // agent binary to run inside the VM (e.g. "claude") + DefaultFlags []string // flags always passed to the binary + UserArgs []string // additional args from the user + VagrantDir string // directory containing the Vagrantfile + Provider string // vagrant provider ("utm" or "libvirt") + EnvVars []string // KEY=VALUE pairs to set inside the VM via `env` + ProjectDir string // host project directory — basename is used as workdir in VM +} + +// BuildVagrantSSHArgv constructs the remote-command argv for: +// +// vagrant ssh -- -t bash -l -c "cd ~/project && [env KEY=VAL ...] " +// +// The remote command is wrapped in `bash -l -c "..."` so that the login shell +// sources ~/.profile and ~/.nix-profile/etc/profile.d/nix.sh, putting +// home-manager-installed binaries (claude, codex, etc.) on PATH. +// +// When ProjectDir is set, the command cds into ~/basename(ProjectDir) first, +// mirroring Docker's --workdir behaviour. The post-up rsync trigger syncs the +// project there, so the agent sees the correct working directory. +// +// The caller is responsible for running the command with its working directory +// set to VagrantDir (via cmd.Dir) so vagrant finds the correct Vagrantfile. +// It is a pure function: no I/O, no exec. +func BuildVagrantSSHArgv(spec VagrantSpec) []string { + // Build the inner command tokens: [env KEY=VAL...] binary flags... args... + var tokens []string + if len(spec.EnvVars) > 0 { + tokens = append(tokens, "env") + tokens = append(tokens, spec.EnvVars...) + } + tokens = append(tokens, spec.Binary) + tokens = append(tokens, spec.DefaultFlags...) + tokens = append(tokens, spec.UserArgs...) + + // Shell-quote each token and join into a single string for bash -c. + agentCmd := shellJoinTokens(tokens) + + // Prepend cd into the project workdir when ProjectDir is known. + // The post-up rsync trigger syncs ProjectDir to ~/basename(ProjectDir). + var remoteCmd string + if spec.ProjectDir != "" { + basename := filepath.Base(spec.ProjectDir) + remoteCmd = "cd ~/" + shellQuoteToken(basename) + " && " + agentCmd + } else { + remoteCmd = agentCmd + } + + // Explicitly source the nix profile before running the agent binary. + // The utm/bookworm box ships a .bash_profile that doesn't source .profile, + // so the nix installer's PATH additions (written to .profile) are never loaded + // by `bash -l`. Sourcing nix.sh directly guarantees home-manager-installed + // binaries are on PATH regardless of the box's shell init files. + const nixSource = `. "$HOME/.nix-profile/etc/profile.d/nix.sh" 2>/dev/null || true` + remoteCmd = nixSource + "; " + remoteCmd + + // Use a login bash shell so nix profile is sourced before running the binary. + // Shell-quote remoteCmd so the outer sshd shell passes it as a single token to + // bash's -c. Without quoting, SSH joins ["bash","-l","-c","script"] with spaces + // and the remote shell splits "script" at word boundaries, breaking the command. + return []string{"vagrant", "ssh", "--", "-t", "bash", "-l", "-c", shellQuoteToken(remoteCmd)} +} + +// shellJoinTokens shell-quotes each token and joins them with spaces, +// producing a string safe to pass as the argument to `bash -c`. +func shellJoinTokens(tokens []string) string { + quoted := make([]string, len(tokens)) + for i, t := range tokens { + quoted[i] = shellQuoteToken(t) + } + return strings.Join(quoted, " ") +} + +// shellQuoteToken wraps a token in single quotes, escaping any embedded +// single quotes as '\''. Values that are already safe (no special chars) +// are returned as-is for readability. +func shellQuoteToken(s string) string { + safe := true + for _, r := range s { + if !((r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || + (r >= '0' && r <= '9') || r == '_' || r == '-' || r == '.' || + r == '/' || r == ':' || r == '=' || r == '@' || r == '+') { + safe = false + break + } + } + if safe { + return s + } + // Single-quote with embedded ' escaped as '\'' + return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'" +} + +// VagrantEnsureGUI starts GUI services (Xvfb, fluxbox, x11vnc, xrdp) inside the VM +// if they are not already running. Idempotent — pgrep guards prevent double-start. +// Called when the cell stack includes the desktop module and GUI is enabled. +func VagrantEnsureGUI(ctx context.Context, vagrantDir string, dryRun bool) error { + // Source nix profile first so GUI binaries (Xvfb, fluxbox, x11vnc) are on PATH. + script := `. "$HOME/.nix-profile/etc/profile.d/nix.sh" 2>/dev/null || true` + + `; if ! pgrep -x Xvfb >/dev/null 2>&1; then` + + ` Xvfb :99 -screen 0 1920x1080x24 -ac +extension GLX +render -noreset &` + + ` sleep 1; fi` + + `; if ! pgrep -x fluxbox >/dev/null 2>&1; then DISPLAY=:99 fluxbox &>/dev/null & fi` + + `; if ! pgrep -x x11vnc >/dev/null 2>&1; then` + + ` DISPLAY=:99 x11vnc -display :99 -rfbport 5900 -nopw -forever -shared -quiet &>/dev/null & fi` + + `; sudo systemctl start xrdp 2>/dev/null || true` + + if dryRun { + fmt.Printf("(cd %q && vagrant ssh -- bash -l -c %s)\n", vagrantDir, shellQuoteToken(script)) + return nil + } + cmd := exec.CommandContext(ctx, "vagrant", "ssh", "--", "bash", "-l", "-c", shellQuoteToken(script)) + cmd.Dir = vagrantDir + if err := cmd.Run(); err != nil { + return fmt.Errorf("start GUI services: %w", err) + } + return nil +} + +// VagrantStatusRunning parses `vagrant status --machine-readable` output and returns +// true if the machine state is "running" (libvirt/virtualbox) or "started" (UTM). +// +// Machine-readable format: timestamp,target,type,data (CSV, 4 fields) +// We look for a record where type=="state" and data is "running" or "started". +func VagrantStatusRunning(output string) bool { + r := csv.NewReader(strings.NewReader(output)) + r.FieldsPerRecord = -1 // allow variable number of fields + r.LazyQuotes = true + for { + record, err := r.Read() + if err != nil { + break + } + // timestamp(0), target(1), type(2), data(3) + if len(record) < 4 { + continue + } + if record[2] == "state" && (record[3] == "running" || record[3] == "started") { + return true + } + } + return false +} + +// VagrantRunningCells parses `vagrant global-status` and returns a map of +// projectBasename → machineID for all running devcell VMs. +// Returns an empty map (not an error) if vagrant is not installed or has no VMs. +func VagrantRunningCells() map[string]string { + out, err := exec.Command("vagrant", "global-status").Output() + if err != nil { + return nil + } + return ParseVagrantGlobalStatus(string(out)) +} + +// ParseVagrantGlobalStatus extracts running devcell VM entries from `vagrant global-status` output. +// Returns projectBasename → machineID for running VMs. +// +// Only VMs whose directory ends in ".devcell" are considered devcell cells. +// UTM reports state as "started"; other providers use "running" — both are accepted. +// +// Output format: +// +// id name provider state directory +// abc1234 default utm started /Users/dmitry/dev/myproject/.devcell +func ParseVagrantGlobalStatus(output string) map[string]string { + result := make(map[string]string) + for _, line := range strings.Split(output, "\n") { + fields := strings.Fields(line) + // id(0) name(1) provider(2) state(3) directory(4) + if len(fields) < 5 { + continue + } + state := fields[3] + if state != "running" && state != "started" { + continue + } + vagrantDir := fields[4] + // Only consider directories that are a .devcell folder — this is the + // devcell-specific convention; other vagrant VMs on the machine are ignored. + if filepath.Base(vagrantDir) != ".devcell" { + continue + } + // Project root is one level up from .devcell. + projectRoot := filepath.Dir(vagrantDir) + machineID := fields[0] + result[filepath.Base(projectRoot)] = machineID + } + return result +} + +// VagrantMachinePort returns the host port mapped from guestPort for the VM identified +// by machineID. Uses `vagrant port --machine-readable` — no file-system access +// needed, works regardless of where the Vagrantfile lives on disk. +func VagrantMachinePort(machineID, guestPort string) (string, bool) { + out, err := exec.Command("vagrant", "port", machineID, "--machine-readable").Output() + if err != nil { + return "", false + } + return ParseVagrantPortOutput(string(out), guestPort) +} + +// ParseVagrantPortOutput extracts the host port for guestPort from +// `vagrant port --machine-readable` output. +// Line format: timestamp,target,forwarded_port,guestPort,hostPort +func ParseVagrantPortOutput(output, guestPort string) (string, bool) { + needle := ",forwarded_port," + guestPort + "," + for _, line := range strings.Split(output, "\n") { + if !strings.Contains(line, needle) { + continue + } + parts := strings.SplitN(line, ",", 5) + // timestamp(0) target(1) "forwarded_port"(2) guestPort(3) hostPort(4) + if len(parts) == 5 { + return strings.TrimSpace(parts[4]), true + } + } + return "", false +} + +// VagrantReadForwardedPort reads the Vagrantfile in vagrantDir and returns the host +// port for the forwarded_port entry with the given id ("rdp" or "vnc"). +// Looks for lines of the form: +// +// config.vm.network "forwarded_port", guest: 3389, host: 36289, id: "rdp" +func VagrantReadForwardedPort(vagrantDir, portID string) (string, bool) { + data, err := os.ReadFile(filepath.Join(vagrantDir, "Vagrantfile")) + if err != nil { + return "", false + } + needle := `id: "` + portID + `"` + for _, line := range strings.Split(string(data), "\n") { + if !strings.Contains(line, needle) { + continue + } + // Extract host: + if idx := strings.Index(line, "host: "); idx != -1 { + rest := line[idx+len("host: "):] + // read digits + end := 0 + for end < len(rest) && rest[end] >= '0' && rest[end] <= '9' { + end++ + } + if end > 0 { + return rest[:end], true + } + } + } + return "", false +} + +// VagrantIsRunning checks whether the vagrant VM in vagrantDir is currently running. +// Returns false quickly when vagrantDir has no Vagrantfile (no subprocess needed). +// When vagrant CLI is unavailable, falls back to VagrantMachineCreated which checks +// whether the machine has been provisioned at least once (id file present). +func VagrantIsRunning(vagrantDir string) bool { + vagrantfile := filepath.Join(vagrantDir, "Vagrantfile") + if _, err := os.Stat(vagrantfile); err != nil { + vagrantDebug("VagrantIsRunning: no Vagrantfile at %s: %v", vagrantfile, err) + return false + } + vagrantDebug("VagrantIsRunning: Vagrantfile found at %s", vagrantfile) + out, err := vagrantOutput(context.Background(), vagrantDir, "status", "--machine-readable") + if err != nil { + vagrantDebug("VagrantIsRunning: vagrant status failed (%v) — falling back to VagrantMachineCreated", err) + created := VagrantMachineCreated(vagrantDir) + vagrantDebug("VagrantIsRunning: VagrantMachineCreated=%v", created) + return created + } + running := VagrantStatusRunning(out) + vagrantDebug("VagrantIsRunning: vagrant status output=%q running=%v", strings.TrimSpace(out), running) + return running +} + +// VagrantMachineCreated returns true if the vagrant VM in vagrantDir has been +// created at least once — i.e. .vagrant/machines/default//id exists. +// Used as a fallback when vagrant CLI is not available in the current environment. +func VagrantMachineCreated(vagrantDir string) bool { + machinesDir := filepath.Join(vagrantDir, ".vagrant", "machines", "default") + entries, err := os.ReadDir(machinesDir) + if err != nil { + vagrantDebug("VagrantMachineCreated: cannot read %s: %v", machinesDir, err) + return false + } + for _, e := range entries { + if !e.IsDir() { + continue + } + idFile := filepath.Join(machinesDir, e.Name(), "id") + data, err := os.ReadFile(idFile) + if err == nil && len(strings.TrimSpace(string(data))) > 0 { + vagrantDebug("VagrantMachineCreated: found id file %s (id=%s)", idFile, strings.TrimSpace(string(data))) + return true + } + vagrantDebug("VagrantMachineCreated: id file %s missing or empty (err=%v)", idFile, err) + } + return false +} + +// vagrantDebug prints a debug line when ux.Verbose is active. +func vagrantDebug(format string, args ...any) { + if ux.Verbose { + fmt.Fprintf(os.Stderr, "[vagrant] "+format+"\n", args...) + } +} + +// VagrantBinaryExists checks whether a binary is reachable and executable in the VM's login shell. +// Used for auto-detect: if the binary is missing, the caller should provision before running. +func VagrantBinaryExists(ctx context.Context, vagrantDir, binary string) bool { + // Explicitly source nix profile before checking — the utm/bookworm box's .bash_profile + // does not source .profile, so nix PATH additions are otherwise missing in `bash -l`. + // The [ -x ] guard rejects dangling nix-profile symlinks (broken store paths). + // Shell-quote the script so SSH passes it as a single token (see BuildVagrantSSHArgv). + script := `. "$HOME/.nix-profile/etc/profile.d/nix.sh" 2>/dev/null || true; ` + + `p=$(command -v ` + binary + ` 2>/dev/null) && [ -n "$p" ] && [ -x "$p" ]` + cmd := exec.CommandContext(ctx, "vagrant", "ssh", "--", "bash", "-l", "-c", shellQuoteToken(script)) + cmd.Dir = vagrantDir + return cmd.Run() == nil +} + +// VagrantEnsureUp brings the VM up if it is not already running. +// In dry-run mode prints the would-be command and returns. +func VagrantEnsureUp(ctx context.Context, vagrantDir, provider string, dryRun bool) error { + if dryRun { + fmt.Printf("(cd %q && vagrant up --provider=%s)\n", vagrantDir, provider) + return nil + } + // Check current status + out, err := vagrantOutput(ctx, vagrantDir, "status", "--machine-readable") + if err != nil { + // `vagrant status` can fail if the VM has never been created; that's OK — just try up. + out = "" + } + if VagrantStatusRunning(out) { + return nil // already running + } + return vagrantRunWithSpinner(ctx, vagrantDir, "Starting VM…", "up", "--provider="+provider) +} + +// VagrantProvision runs `vagrant provision` to (re-)apply the nixhome flake. +// In dry-run mode prints the would-be command and returns. +func VagrantProvision(ctx context.Context, vagrantDir string, dryRun bool) error { + if dryRun { + fmt.Printf("(cd %q && vagrant provision)\n", vagrantDir) + return nil + } + return vagrantRunWithSpinner(ctx, vagrantDir, "Provisioning VM…", "provision") +} + +// VagrantUploadNixhome uploads a local nixhome directory into the VM at ~/nixhome +// using `vagrant upload nixhome`. No-op when nixhomePath is empty. +// The provisioner checks $HOME/nixhome first (set by this upload), then falls back to GitHub. +func VagrantUploadNixhome(ctx context.Context, vagrantDir, nixhomePath string, dryRun bool) error { + if nixhomePath == "" { + return nil + } + if dryRun { + fmt.Printf("(cd %q && vagrant upload %s nixhome)\n", vagrantDir, nixhomePath) + return nil + } + return vagrantRunWithSpinner(ctx, vagrantDir, "Uploading nixhome…", "upload", nixhomePath, "nixhome") +} + +// vagrantOutput runs a vagrant command in vagrantDir and returns combined output. +func vagrantOutput(ctx context.Context, vagrantDir string, args ...string) (string, error) { + cmd := exec.CommandContext(ctx, "vagrant", args...) + cmd.Dir = vagrantDir + out, err := cmd.CombinedOutput() + return string(out), err +} + +// vagrantRunWithSpinner runs a vagrant command with a spinner when not in debug/verbose mode. +// In verbose mode, output streams directly to the user (same as before). In normal mode, +// a spinner is shown and output is buffered — printed only on failure. +func vagrantRunWithSpinner(ctx context.Context, vagrantDir, label string, args ...string) error { + if ux.Verbose { + return vagrantRun(ctx, vagrantDir, args...) + } + sp := ux.NewProgressSpinner(label) + var buf bytes.Buffer + cmd := exec.CommandContext(ctx, "vagrant", args...) + cmd.Dir = vagrantDir + cmd.Stdout = &buf + cmd.Stderr = &buf + if err := cmd.Run(); err != nil { + sp.Fail(label) + if buf.Len() > 0 { + fmt.Fprintln(os.Stderr, buf.String()) + } + return fmt.Errorf("vagrant %s: %w", strings.Join(args, " "), err) + } + sp.Success(label) + return nil +} + +// vagrantRun runs a vagrant command in vagrantDir, streaming stdio directly to the user. +// Used when verbose output is desired (e.g. ux.Verbose is true). +func vagrantRun(ctx context.Context, vagrantDir string, args ...string) error { + cmd := exec.CommandContext(ctx, "vagrant", args...) + cmd.Dir = vagrantDir + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("vagrant %s: %w", strings.Join(args, " "), err) + } + return nil +} + diff --git a/internal/runner/vagrant_test.go b/internal/runner/vagrant_test.go new file mode 100644 index 0000000..f237271 --- /dev/null +++ b/internal/runner/vagrant_test.go @@ -0,0 +1,491 @@ +package runner_test + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/DimmKirr/devcell/internal/cfg" + "github.com/DimmKirr/devcell/internal/config" + "github.com/DimmKirr/devcell/internal/runner" +) + +// vagrantSpec builds a minimal VagrantSpec for testing. +func vagrantSpec(extra ...func(*runner.VagrantSpec)) runner.VagrantSpec { + spec := runner.VagrantSpec{ + Config: config.Load("/home/bob/myproject", func(k string) string { return "" }), + CellCfg: cfg.CellConfig{}, + Binary: "claude", + DefaultFlags: []string{"--dangerously-skip-permissions"}, + UserArgs: nil, + VagrantDir: "/home/bob/myproject/.devcell/vagrant", + Provider: "utm", + } + for _, fn := range extra { + fn(&spec) + } + return spec +} + +// L1: Pure unit tests — BuildVagrantSSHArgv is a pure function; no I/O. + +func TestBuildVagrantSSHArgv_ContainsVagrantSSH(t *testing.T) { + argv := runner.BuildVagrantSSHArgv(vagrantSpec()) + if len(argv) < 2 || argv[0] != "vagrant" || argv[1] != "ssh" { + t.Fatalf("expected argv[0..1]=[vagrant ssh], got %v", argv) + } + // VagrantDir is NOT in the argv — caller sets cmd.Dir instead. + for _, a := range argv { + if a == "--chdir" { + t.Errorf("--chdir must not appear in argv (use cmd.Dir): %v", argv) + } + } +} + +func TestBuildVagrantSSHArgv_HasLoginBashWrapper(t *testing.T) { + argv := runner.BuildVagrantSSHArgv(vagrantSpec()) + // Remote command must be wrapped in bash -l -c "..." so nix profile is sourced. + joined := strings.Join(argv, " ") + if !strings.Contains(joined, "bash -l -c") { + t.Errorf("expected 'bash -l -c' (login shell) in argv: %v", argv) + } +} + +func TestBuildVagrantSSHArgv_RunsCorrectBinary(t *testing.T) { + argv := runner.BuildVagrantSSHArgv(vagrantSpec()) + joined := strings.Join(argv, " ") + if !strings.Contains(joined, "claude") { + t.Errorf("expected binary 'claude' in argv: %v", argv) + } +} + +func TestBuildVagrantSSHArgv_DefaultFlagsIncluded(t *testing.T) { + argv := runner.BuildVagrantSSHArgv(vagrantSpec()) + joined := strings.Join(argv, " ") + if !strings.Contains(joined, "--dangerously-skip-permissions") { + t.Errorf("expected default flags in argv: %v", argv) + } +} + +func TestBuildVagrantSSHArgv_UserArgsAppended(t *testing.T) { + spec := vagrantSpec(func(s *runner.VagrantSpec) { + s.UserArgs = []string{"--model", "opus"} + }) + argv := runner.BuildVagrantSSHArgv(spec) + joined := strings.Join(argv, " ") + if !strings.Contains(joined, "--model") || !strings.Contains(joined, "opus") { + t.Errorf("expected user args in argv: %v", argv) + } +} + +func TestBuildVagrantSSHArgv_VagrantDirNotInArgv(t *testing.T) { + spec := vagrantSpec(func(s *runner.VagrantSpec) { + s.VagrantDir = "/custom/vagrant/dir" + }) + argv := runner.BuildVagrantSSHArgv(spec) + joined := strings.Join(argv, " ") + // VagrantDir must NOT appear in argv; the caller sets cmd.Dir instead. + if strings.Contains(joined, "/custom/vagrant/dir") { + t.Errorf("VagrantDir must not appear in argv (use cmd.Dir): %v", argv) + } +} + +func TestBuildVagrantSSHArgv_NoDuplicateFlags(t *testing.T) { + argv := runner.BuildVagrantSSHArgv(vagrantSpec()) + seen := map[string]int{} + for _, a := range argv { + seen[a]++ + } + for k, c := range seen { + if c > 1 && strings.HasPrefix(k, "--") { + t.Errorf("flag %q appears %d times in argv %v", k, c, argv) + } + } +} + +// L1: VagrantStatus parsing — pure function, no I/O. +// All tests use --machine-readable CSV format (timestamp,target,type,data). + +func TestVagrantStatusRunning_UTMStarted(t *testing.T) { + out := "1776348449,default,provider-name,utm\n1776348449,default,state,started\n" + if !runner.VagrantStatusRunning(out) { + t.Errorf("expected running=true for UTM started: %q", out) + } +} + +func TestVagrantStatusRunning_LibvirtRunning(t *testing.T) { + out := "1776348449,default,provider-name,libvirt\n1776348449,default,state,running\n" + if !runner.VagrantStatusRunning(out) { + t.Errorf("expected running=true for libvirt running: %q", out) + } +} + +func TestVagrantStatusRunning_PowerOff(t *testing.T) { + out := "1776348449,default,provider-name,utm\n1776348449,default,state,poweroff\n" + if runner.VagrantStatusRunning(out) { + t.Errorf("expected running=false for poweroff: %q", out) + } +} + +func TestVagrantStatusRunning_Aborted(t *testing.T) { + out := "1776348449,default,provider-name,utm\n1776348449,default,state,aborted\n" + if runner.VagrantStatusRunning(out) { + t.Errorf("expected running=false for aborted: %q", out) + } +} + +func TestVagrantStatusRunning_EmptyOutput(t *testing.T) { + if runner.VagrantStatusRunning("") { + t.Error("expected running=false for empty output") + } +} + +// L1: EnvVars injection — BuildVagrantSSHArgv is pure; no I/O. + +func TestBuildVagrantSSHArgv_EnvVarsInjectedBeforeBinary(t *testing.T) { + spec := vagrantSpec(func(s *runner.VagrantSpec) { + s.EnvVars = []string{"TERM=xterm-256color", "LANG=en_US.UTF-8"} + }) + argv := runner.BuildVagrantSSHArgv(spec) + + // The remote command is the last argv element (the bash -c argument). + remoteCmd := argv[len(argv)-1] + + envIdx := strings.Index(remoteCmd, "env ") + binaryIdx := strings.Index(remoteCmd, "claude") + if envIdx == -1 { + t.Fatalf("'env' not found in remote command when EnvVars set: %q", remoteCmd) + } + if binaryIdx == -1 { + t.Fatalf("'claude' not found in remote command: %q", remoteCmd) + } + if envIdx >= binaryIdx { + t.Errorf("'env' must appear before binary in remote command: %q", remoteCmd) + } +} + +func TestBuildVagrantSSHArgv_EnvVarValuesPresent(t *testing.T) { + spec := vagrantSpec(func(s *runner.VagrantSpec) { + s.EnvVars = []string{"TERM=xterm-256color", "LANG=en_US.UTF-8"} + }) + argv := runner.BuildVagrantSSHArgv(spec) + joined := strings.Join(argv, " ") + if !strings.Contains(joined, "TERM=xterm-256color") { + t.Errorf("expected TERM env var in argv: %v", argv) + } + if !strings.Contains(joined, "LANG=en_US.UTF-8") { + t.Errorf("expected LANG env var in argv: %v", argv) + } +} + +func TestBuildVagrantSSHArgv_NoEnvCommandWhenNoEnvVars(t *testing.T) { + argv := runner.BuildVagrantSSHArgv(vagrantSpec()) + remoteCmd := argv[len(argv)-1] + if strings.HasPrefix(remoteCmd, "env ") { + t.Errorf("remote command should not start with 'env' when no EnvVars: %q", remoteCmd) + } +} + +// L1: ProjectDir workdir — BuildVagrantSSHArgv is pure; no I/O. + +func TestBuildVagrantSSHArgv_ProjectDirCdPrefix(t *testing.T) { + spec := vagrantSpec(func(s *runner.VagrantSpec) { + s.ProjectDir = "/Users/dmitry/dev/myproject" + }) + argv := runner.BuildVagrantSSHArgv(spec) + remoteCmd := argv[len(argv)-1] + // The nix profile source prefix comes first, then the cd; check both are present in order. + nixIdx := strings.Index(remoteCmd, "nix-profile") + cdIdx := strings.Index(remoteCmd, "cd ~/myproject") + if cdIdx == -1 { + t.Errorf("expected remote command to contain 'cd ~/myproject', got: %q", remoteCmd) + } + if nixIdx != -1 && cdIdx < nixIdx { + t.Errorf("expected 'cd ~/myproject' to come after nix source prefix, got: %q", remoteCmd) + } +} + +func TestBuildVagrantSSHArgv_ProjectDirCdThenBinary(t *testing.T) { + spec := vagrantSpec(func(s *runner.VagrantSpec) { + s.ProjectDir = "/Users/dmitry/dev/myproject" + }) + argv := runner.BuildVagrantSSHArgv(spec) + remoteCmd := argv[len(argv)-1] + cdIdx := strings.Index(remoteCmd, "cd ~/myproject") + binaryIdx := strings.Index(remoteCmd, "claude") + if cdIdx == -1 { + t.Fatalf("cd not found in remote command: %q", remoteCmd) + } + if binaryIdx == -1 { + t.Fatalf("binary not found in remote command: %q", remoteCmd) + } + if cdIdx >= binaryIdx { + t.Errorf("cd must appear before binary: %q", remoteCmd) + } +} + +func TestBuildVagrantSSHArgv_NoProjectDirNoCd(t *testing.T) { + argv := runner.BuildVagrantSSHArgv(vagrantSpec()) + remoteCmd := argv[len(argv)-1] + if strings.Contains(remoteCmd, "cd ~/") { + t.Errorf("expected no cd prefix when ProjectDir is empty: %q", remoteCmd) + } +} + +// L2: VagrantBinaryExists — returns false when vagrant command fails (no VM). + +func TestVagrantBinaryExists_ReturnsFalseWhenVagrantFails(t *testing.T) { + // An empty temp dir has no Vagrantfile; vagrant ssh will fail → binary absent. + exists := runner.VagrantBinaryExists(context.Background(), t.TempDir(), "claude") + if exists { + t.Error("expected false when vagrant command fails (no VM)") + } +} + +// L1: VagrantIsRunning fast-path — returns false immediately when no Vagrantfile present. +// No vagrant subprocess is spawned, so this test is safe to run anywhere. + +func TestVagrantIsRunning_NoVagrantfile(t *testing.T) { + // Empty temp dir has no Vagrantfile — must return false without calling vagrant. + if runner.VagrantIsRunning(t.TempDir()) { + t.Error("expected false when no Vagrantfile present") + } +} + +func TestVagrantIsRunning_EmptyDir(t *testing.T) { + if runner.VagrantIsRunning("") { + t.Error("expected false for empty dir") + } +} + +func TestVagrantIsRunning_NonExistentDir(t *testing.T) { + if runner.VagrantIsRunning("/nonexistent/path/that/cannot/exist") { + t.Error("expected false for non-existent dir") + } +} + +// L1: VagrantMachineCreated — pure file check, no subprocess. + +func TestVagrantMachineCreated_WithIDFile(t *testing.T) { + dir := t.TempDir() + idPath := filepath.Join(dir, ".vagrant", "machines", "default", "utm", "id") + if err := os.MkdirAll(filepath.Dir(idPath), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(idPath, []byte("E0F4A259-5FCA-4B08-BD30-6A707B1D35B6"), 0o644); err != nil { + t.Fatal(err) + } + if !runner.VagrantMachineCreated(dir) { + t.Error("expected true when id file exists with content") + } +} + +func TestVagrantMachineCreated_EmptyIDFile(t *testing.T) { + dir := t.TempDir() + idPath := filepath.Join(dir, ".vagrant", "machines", "default", "utm", "id") + if err := os.MkdirAll(filepath.Dir(idPath), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(idPath, []byte(""), 0o644); err != nil { + t.Fatal(err) + } + if runner.VagrantMachineCreated(dir) { + t.Error("expected false when id file is empty (VM destroyed)") + } +} + +func TestVagrantMachineCreated_NoMachinesDir(t *testing.T) { + if runner.VagrantMachineCreated(t.TempDir()) { + t.Error("expected false when no .vagrant/machines dir") + } +} + +// L1: ParseVagrantGlobalStatus — pure function, no I/O. + +func TestParseVagrantGlobalStatus_RunningVM(t *testing.T) { + // UTM uses "started" state, not "running". + output := `id name provider state directory +------------------------------------------------------- +abc1234 default utm started /Users/dmitry/dev/devcell/.devcell +` + result := runner.ParseVagrantGlobalStatus(output) + if len(result) != 1 { + t.Fatalf("want 1 entry, got %d: %v", len(result), result) + } + machineID, ok := result["devcell"] + if !ok { + t.Fatalf("want key 'devcell', got %v", result) + } + if machineID != "abc1234" { + t.Errorf("want machineID 'abc1234', got %q", machineID) + } +} + +func TestParseVagrantGlobalStatus_AcceptsRunningState(t *testing.T) { + // Non-UTM providers (parallels, qemu) report "running". + output := `id name provider state directory +------------------------------------------------------- +abc1234 default parallels running /Users/dmitry/dev/proj1/.devcell +` + result := runner.ParseVagrantGlobalStatus(output) + if len(result) != 1 { + t.Fatalf("want 1 entry for 'running' state, got %d: %v", len(result), result) + } +} + +func TestParseVagrantGlobalStatus_SkipsStopped(t *testing.T) { + output := `id name provider state directory +------------------------------------------------------- +abc1234 default utm started /Users/dmitry/dev/proj1/.devcell +def5678 default utm stopped /Users/dmitry/dev/proj2/.devcell +` + result := runner.ParseVagrantGlobalStatus(output) + if len(result) != 1 { + t.Fatalf("want only started VMs, got %d: %v", len(result), result) + } + if id, ok := result["proj1"]; !ok { + t.Errorf("expected 'proj1' in result, got %v", result) + } else if id != "abc1234" { + t.Errorf("want machineID 'abc1234', got %q", id) + } +} + +func TestParseVagrantGlobalStatus_SkipsNonDevcellDirs(t *testing.T) { + // VMs not in a .devcell directory should be ignored (not devcell cells). + output := `id name provider state directory +------------------------------------------------------- +abc1234 default parallels running /Users/dmitry/dev/claudelibs +def5678 default qemu running /Users/dmitry/dev/devcell/test/vagrant +ghi9012 default utm started /Users/dmitry/dev/myproject/.devcell +` + result := runner.ParseVagrantGlobalStatus(output) + if len(result) != 1 { + t.Fatalf("want only .devcell VMs, got %d: %v", len(result), result) + } + if _, ok := result["myproject"]; !ok { + t.Errorf("expected 'myproject', got %v", result) + } +} + +func TestParseVagrantGlobalStatus_MultipleRunning(t *testing.T) { + output := `id name provider state directory +------------------------------------------------------- +abc1234 default utm started /Users/dmitry/dev/proj1/.devcell +def5678 default libvirt running /home/user/proj2/.devcell +` + result := runner.ParseVagrantGlobalStatus(output) + if len(result) != 2 { + t.Fatalf("want 2 running VMs, got %d: %v", len(result), result) + } +} + +func TestParseVagrantGlobalStatus_Empty(t *testing.T) { + result := runner.ParseVagrantGlobalStatus("") + if len(result) != 0 { + t.Errorf("want empty map for empty output, got %v", result) + } +} + +// L1: VagrantReadForwardedPort — pure file read, no subprocess. + +func TestVagrantReadForwardedPort_RDP(t *testing.T) { + dir := t.TempDir() + content := `Vagrant.configure("2") do |config| + config.vm.network "forwarded_port", guest: 5900, host: 40550, id: "vnc" + config.vm.network "forwarded_port", guest: 3389, host: 36289, id: "rdp" +end +` + if err := os.WriteFile(dir+"/Vagrantfile", []byte(content), 0o644); err != nil { + t.Fatal(err) + } + port, ok := runner.VagrantReadForwardedPort(dir, "rdp") + if !ok { + t.Fatal("expected rdp port to be found") + } + if port != "36289" { + t.Errorf("want 36289, got %q", port) + } +} + +func TestVagrantReadForwardedPort_VNC(t *testing.T) { + dir := t.TempDir() + content := `Vagrant.configure("2") do |config| + config.vm.network "forwarded_port", guest: 5900, host: 40550, id: "vnc" + config.vm.network "forwarded_port", guest: 3389, host: 36289, id: "rdp" +end +` + if err := os.WriteFile(dir+"/Vagrantfile", []byte(content), 0o644); err != nil { + t.Fatal(err) + } + port, ok := runner.VagrantReadForwardedPort(dir, "vnc") + if !ok { + t.Fatal("expected vnc port to be found") + } + if port != "40550" { + t.Errorf("want 40550, got %q", port) + } +} + +func TestVagrantReadForwardedPort_MissingID(t *testing.T) { + dir := t.TempDir() + content := `Vagrant.configure("2") do |config| + config.vm.network "forwarded_port", guest: 5900, host: 40550, id: "vnc" +end +` + if err := os.WriteFile(dir+"/Vagrantfile", []byte(content), 0o644); err != nil { + t.Fatal(err) + } + _, ok := runner.VagrantReadForwardedPort(dir, "rdp") + if ok { + t.Error("expected false when id not present") + } +} + +func TestVagrantReadForwardedPort_NoVagrantfile(t *testing.T) { + _, ok := runner.VagrantReadForwardedPort(t.TempDir(), "rdp") + if ok { + t.Error("expected false when no Vagrantfile") + } +} + +// L1: VagrantMachinePort parsing — pure, tests ParseVagrantMachineReadable directly. + +func TestParseVagrantMachineReadable_RDP(t *testing.T) { + output := `1699999999,,metadata,machine-count,1 +1699999999,default,forwarded_port,22,2222 +1699999999,default,forwarded_port,3389,36289 +1699999999,default,forwarded_port,5900,40550 +` + port, ok := runner.ParseVagrantPortOutput(output, "3389") + if !ok { + t.Fatal("expected rdp port to be found") + } + if port != "36289" { + t.Errorf("want 36289, got %q", port) + } +} + +func TestParseVagrantMachineReadable_VNC(t *testing.T) { + output := `1699999999,default,forwarded_port,22,2222 +1699999999,default,forwarded_port,3389,36289 +1699999999,default,forwarded_port,5900,40550 +` + port, ok := runner.ParseVagrantPortOutput(output, "5900") + if !ok { + t.Fatal("expected vnc port to be found") + } + if port != "40550" { + t.Errorf("want 40550, got %q", port) + } +} + +func TestParseVagrantMachineReadable_MissingPort(t *testing.T) { + output := `1699999999,default,forwarded_port,22,2222 +` + _, ok := runner.ParseVagrantPortOutput(output, "3389") + if ok { + t.Error("expected false when port not in output") + } +} diff --git a/internal/scaffold/scaffold.go b/internal/scaffold/scaffold.go index 32f0c7b..ccd37bc 100644 --- a/internal/scaffold/scaffold.go +++ b/internal/scaffold/scaffold.go @@ -27,6 +27,9 @@ var starshipTomlContent []byte //go:embed templates/Vagrantfile.tmpl var vagrantfileContent []byte +//go:embed templates/Vagrantfile.linux.tmpl +var LinuxVagrantfileContent []byte + type scaffoldFile struct { name string content []byte @@ -633,6 +636,54 @@ func statErr(path string) error { return err } +// dirModules is the set of nixhome module names that are directories (not .nix files). +// Used when the nixhome source is not available locally for filesystem inspection. +var dirModules = map[string]bool{"desktop": true, "llm": true, "scraping": true} + +// moduleImportPath returns the nix import path for a module relative to hosts/linux/. +// Checks the actual filesystem when nixhomeDir is available, otherwise uses dirModules. +func moduleImportPath(nixhomeDir, name string) string { + if nixhomeDir != "" { + p := filepath.Join(nixhomeDir, "modules", name) + if fi, err := os.Stat(p); err == nil && fi.IsDir() { + return "../../modules/" + name + } + return "../../modules/" + name + ".nix" + } + if dirModules[name] { + return "../../modules/" + name + } + return "../../modules/" + name + ".nix" +} + +// ScaffoldVagrantLinuxStack generates hosts/linux/stack.nix inside nixhomeDir +// to reflect the current stack + extra modules from .devcell.toml. +// Always overwrites — this file is generated before each nixhome upload. +// No-op when nixhomeDir is empty (GitHub fallback: default stack.nix from repo). +func ScaffoldVagrantLinuxStack(nixhomeDir, stack string, modules []string) error { + if nixhomeDir == "" { + return nil + } + if stack == "" { + stack = "base" + } + dest := filepath.Join(nixhomeDir, "hosts", "linux", "stack.nix") + if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil { + return fmt.Errorf("mkdir hosts/linux: %w", err) + } + + var sb strings.Builder + sb.WriteString("# Generated by cell — do not edit. Stack: " + stack + "\n") + sb.WriteString("{ ... }: {\n imports = [\n") + sb.WriteString(" ../../stacks/" + stack + ".nix\n") + for _, m := range modules { + sb.WriteString(" " + moduleImportPath(nixhomeDir, m) + "\n") + } + sb.WriteString(" ];\n}\n") + + return os.WriteFile(dest, []byte(sb.String()), 0644) +} + // IsInitialized returns true when .devcell.toml exists in dir. func IsInitialized(dir string) bool { _, err := os.Stat(filepath.Join(dir, ".devcell.toml")) @@ -659,3 +710,50 @@ func ScaffoldVagrantfile(dir, vagrantBox, nixhomePath string) error { } return nil } + +// ScaffoldLinuxVagrantfile writes a Linux Vagrantfile (Debian ARM64 + Nix) to dir, +// substituting all template placeholders from the provided arguments. +// hostHome is the host user's home directory (e.g. /home/dmitry) used to +// locate ~/.claude/ directories. configDir is the devcell config directory +// (e.g. ~/.config/devcell) shared into /etc/devcell/config inside the VM. +// Skips writing if a Vagrantfile already exists (idempotent). +func ScaffoldLinuxVagrantfile(dir, vagrantBox, provider, stack, projectDir, nixhomeDir, vncPort, rdpPort, hostHome, configDir string) error { + dest := filepath.Join(dir, "Vagrantfile") + // Strip leading zeros from port numbers — Ruby interprets 0NNN as octal. + vncPort = strings.TrimLeft(vncPort, "0") + if vncPort == "" { + vncPort = "0" + } + rdpPort = strings.TrimLeft(rdpPort, "0") + if rdpPort == "" { + rdpPort = "0" + } + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("mkdir %s: %w", dir, err) + } + // VM hostname must not start with a dot or hyphen (e.g. dir is ".devcell"). + vmName := strings.TrimLeft(filepath.Base(dir), ".-") + if vmName == "" { + vmName = "devcell" + } + guiEnabled := "false" + switch stack { + case "ultimate", "electronics": + guiEnabled = "true" + } + content := bytes.ReplaceAll(LinuxVagrantfileContent, []byte("{{VAGRANT_BOX}}"), []byte(vagrantBox)) + content = bytes.ReplaceAll(content, []byte("{{VAGRANT_PROVIDER}}"), []byte(provider)) + content = bytes.ReplaceAll(content, []byte("{{VM_NAME}}"), []byte(vmName)) + content = bytes.ReplaceAll(content, []byte("{{PROJECT_DIR}}"), []byte(projectDir)) + content = bytes.ReplaceAll(content, []byte("{{NIXHOME_DIR}}"), []byte(nixhomeDir)) + content = bytes.ReplaceAll(content, []byte("{{STACK}}"), []byte(stack)) + content = bytes.ReplaceAll(content, []byte("{{VNC_PORT}}"), []byte(vncPort)) + content = bytes.ReplaceAll(content, []byte("{{RDP_PORT}}"), []byte(rdpPort)) + content = bytes.ReplaceAll(content, []byte("{{HOST_HOME}}"), []byte(hostHome)) + content = bytes.ReplaceAll(content, []byte("{{CONFIG_DIR}}"), []byte(configDir)) + content = bytes.ReplaceAll(content, []byte("{{GUI_ENABLED}}"), []byte(guiEnabled)) + if err := os.WriteFile(dest, content, 0644); err != nil { + return fmt.Errorf("write Vagrantfile: %w", err) + } + return nil +} diff --git a/internal/scaffold/scaffold_test.go b/internal/scaffold/scaffold_test.go index 10c4e50..1fb8e58 100644 --- a/internal/scaffold/scaffold_test.go +++ b/internal/scaffold/scaffold_test.go @@ -72,8 +72,8 @@ func TestScaffold_DefaultBaseImageIsRemote(t *testing.T) { if strings.Contains(tag, "-local") { t.Errorf("default base image must not be a local tag: %s", tag) } - if !strings.HasPrefix(tag, "ghcr.io/dimmkirr/devcell:") { - t.Errorf("default base image must be from ghcr.io registry: %s", tag) + if !strings.HasPrefix(tag, "public.ecr.aws/w1l3v2k8/devcell:") { + t.Errorf("default base image must be from ECR registry: %s", tag) } } @@ -874,7 +874,7 @@ func TestRegenerateBuildContext_BaseStackUsesCore(t *testing.T) { } df, _ := os.ReadFile(filepath.Join(dir, "Dockerfile")) - if !strings.HasPrefix(string(df), "FROM ghcr.io/dimmkirr/devcell:v0.0.0-core") { + if !strings.HasPrefix(string(df), "FROM public.ecr.aws/w1l3v2k8/devcell:v0.0.0-core") { t.Errorf("base stack should use core image, got:\n%s", strings.SplitN(string(df), "\n", 2)[0]) } } @@ -920,7 +920,7 @@ func TestRegenerateBuildContext_NonBaseStackFallsBackToCore(t *testing.T) { // In test env, docker images aren't available — should fall back to core. df, _ := os.ReadFile(filepath.Join(dir, "Dockerfile")) fromLine := strings.SplitN(string(df), "\n", 2)[0] - if !strings.HasPrefix(fromLine, "FROM ghcr.io/dimmkirr/devcell:v0.0.0-core") { + if !strings.HasPrefix(fromLine, "FROM public.ecr.aws/w1l3v2k8/devcell:v0.0.0-core") { t.Errorf("should fall back to core when pre-built not available, got:\n%s", fromLine) } } diff --git a/internal/scaffold/templates/Vagrantfile.linux.tmpl b/internal/scaffold/templates/Vagrantfile.linux.tmpl new file mode 100644 index 0000000..232ad52 --- /dev/null +++ b/internal/scaffold/templates/Vagrantfile.linux.tmpl @@ -0,0 +1,287 @@ +# -*- mode: ruby -*- +# frozen_string_literal: true +# +# devcell Linux VM — Debian ARM64 + Nix home-manager +# Provider: utm (Apple Silicon Mac) or libvirt/KVM (Linux host) +# +# Provisioning (runs once on first `vagrant up`): +# 1. Install Nix (no-daemon mode) +# 2. Apply home-manager flake: vagrant-linux (or vagrant-linux-aarch64 on ARM) +# +# Usage: +# vagrant up --provider={{VAGRANT_PROVIDER}} +# vagrant ssh -- -t claude +# +# Disk resize (UTM): +# The trigger below resizes the QEMU disk to DISK_GB before each `vagrant up`. +# On first run the disk image does not exist yet, so resize is skipped. +# After the VM is first created, run `vagrant halt && vagrant up` once so the +# trigger can resize the disk before the in-VM provisioner grows the filesystem. +# Requires qemu-img on the host: brew install qemu + +DISK_GB = 30 + +Vagrant.configure("2") do |config| + config.vm.box = ENV["VAGRANT_BOX"] || "{{VAGRANT_BOX}}" + config.vm.hostname = "{{VM_NAME}}" + + # Disable ALL Vagrant synced folders — vagrant-utm scans every synced_folder + # entry (regardless of type or placement) and shows a "MANUAL STEP REQUIRED: + # VirtioFS" prompt for each one. Syncing is handled by the post-up trigger below. + config.vm.synced_folder ".", "/vagrant", disabled: true + + # GUI ports (VNC + RDP) — forwarded to host for cell vnc / cell rdp + config.vm.network "forwarded_port", guest: 5900, host: {{VNC_PORT}}, id: "vnc" + config.vm.network "forwarded_port", guest: 3389, host: {{RDP_PORT}}, id: "rdp" + + # ── UTM disk resize trigger (host-side, runs before every `vagrant up`) ──── + # Resizes the QEMU image to DISK_GB. No-op if disk is already >= DISK_GB. + # Skipped silently if the .qcow2 file does not exist yet (first boot). + config.trigger.before :up do |trigger| + trigger.name = "Resize UTM disk to #{DISK_GB}G" + trigger.ruby do |env, machine| + next unless machine.provider_name.to_s == "utm" + vm_name = "{{VM_NAME}}" + utm_data = File.expand_path( + "~/Library/Containers/com.utmapp.UTM/Data/Documents/#{vm_name}.utm/Data" + ) + qcow2 = Dir.glob("#{utm_data}/*.qcow2").first + unless qcow2 + machine.ui.info("disk resize: no .qcow2 found yet — skipping (run `vagrant halt && vagrant up` after first boot)") + next + end + qemu_img = %w[ + /opt/homebrew/bin/qemu-img + /usr/local/bin/qemu-img + /Applications/UTM.app/Contents/MacOS/qemu-img + ].find { |p| File.executable?(p) } + unless qemu_img + machine.ui.warn("disk resize: qemu-img not found — install with: brew install qemu") + next + end + machine.ui.info("disk resize: resizing #{File.basename(qcow2)} to #{DISK_GB}G") + system(qemu_img, "resize", qcow2, "#{DISK_GB}G") + end + end + + # ── Post-up rsync trigger — mirrors Docker volume mounts via SSH ───────── + # Syncs host directories into the VM after every `vagrant up`, replacing + # the synced_folder mechanism (which triggers VirtioFS prompts in vagrant-utm). + # Mirrors the volume set that Docker's BuildArgv mounts: + # PROJECT_DIR → /home/vagrant/ + # HOST_HOME/.claude/commands → /home/vagrant/.claude/commands + # HOST_HOME/.claude/agents → /home/vagrant/.claude/agents + # HOST_HOME/.claude/skills → /home/vagrant/.claude/skills + # CONFIG_DIR → /home/vagrant/.config/devcell + config.trigger.after :up do |trigger| + trigger.name = "Sync host directories to VM (rsync over SSH)" + trigger.ruby do |env, machine| + ssh = machine.ssh_info + next unless ssh + + ssh_flags = [ + "-p", ssh[:port].to_s, + "-i", ssh[:private_key_path].first, + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + "-o", "LogLevel=ERROR" + ] + ssh_cmd = "ssh #{ssh_flags.join(' ')}" + user_host = "#{ssh[:username]}@#{ssh[:host]}" + + project_dir = "{{PROJECT_DIR}}" + project_dest = "/home/vagrant/" + File.basename(project_dir) + + syncs = [ + { src: project_dir + "/", dest: project_dest + "/", args: ["--exclude=.devcell/nixhome/"] }, + { src: "{{HOST_HOME}}/.claude/commands/", dest: "/home/vagrant/.claude/commands/", args: ["--delete"] }, + { src: "{{HOST_HOME}}/.claude/agents/", dest: "/home/vagrant/.claude/agents/", args: ["--delete"] }, + { src: "{{HOST_HOME}}/.claude/skills/", dest: "/home/vagrant/.claude/skills/", args: ["--delete"] }, + { src: "{{CONFIG_DIR}}/", dest: "/home/vagrant/.config/devcell/", args: ["--delete"] }, + ] + + syncs.each do |s| + next unless File.exist?(s[:src].chomp("/")) + machine.ui.info("sync: #{s[:src]} → #{user_host}:#{s[:dest]}") + system("ssh", *ssh_flags, user_host, "mkdir -p #{s[:dest]}") + system("rsync", "-az", "-e", ssh_cmd, *s[:args], s[:src], "#{user_host}:#{s[:dest]}") + end + end + end + + # ── UTM provider (Apple Silicon Mac — vagrant-utm plugin required) ──────── + config.vm.provider "utm" do |utm| + utm.name = "{{VM_NAME}}" + utm.memory = 8192 + utm.cpus = 4 + end + + # ── libvirt provider (Linux host with KVM — vagrant-libvirt plugin required) + config.vm.provider "libvirt" do |libvirt| + libvirt.driver = "kvm" + libvirt.memory = 8192 + libvirt.cpus = 4 + libvirt.machine_virtual_size = DISK_GB + end + + # ── Disk expand provisioner (runs every `vagrant up`) ──────────────────── + # Grows the root partition and ext4 filesystem to fill any newly available + # disk space after the host-side qemu-img resize. Both growpart and resize2fs + # are idempotent — they exit cleanly when there is nothing to do. + config.vm.provision "disk-expand", type: "shell", run: "always", privileged: true, inline: <<~SHELL + set -euo pipefail + if ! command -v growpart >/dev/null 2>&1; then + apt-get install -y -q cloud-guest-utils 2>/dev/null || true + fi + ROOT_DEV=$(findmnt -n -o SOURCE / 2>/dev/null || echo "") + if [ -z "$ROOT_DEV" ]; then exit 0; fi + # e.g. /dev/vda1 → disk=vda, part=1 + DISK=$(lsblk -no PKNAME "$ROOT_DEV" 2>/dev/null || true) + PART=$(echo "$ROOT_DEV" | grep -o '[0-9]*$' || true) + if [ -n "$DISK" ] && [ -n "$PART" ]; then + growpart "/dev/$DISK" "$PART" 2>/dev/null || true + resize2fs "$ROOT_DEV" 2>/dev/null || true + fi + SHELL + + # ── /etc/devcell/config symlink (mirrors the Docker /etc/devcell/config mount) + # The post-up rsync trigger writes to /home/vagrant/.config/devcell. + # Create /etc/devcell/config as a symlink so tools that read /etc/devcell/config work. + config.vm.provision "devcell-config", type: "shell", run: "always", privileged: true, inline: <<~SHELL + mkdir -p /etc/devcell + if [ ! -L /etc/devcell/config ]; then + ln -sfT /home/vagrant/.config/devcell /etc/devcell/config + fi + # Generate en_US.UTF-8 locale if missing (suppresses bash setlocale warnings) + if ! locale -a 2>/dev/null | grep -q "en_US.utf8"; then + apt-get install -y -q locales 2>/dev/null || true + locale-gen en_US.UTF-8 2>/dev/null || true + fi + SHELL + + # ── devcell-init service — mirrors Docker's entrypoint fragment mechanism ── + # Creates /usr/local/bin/devcell-init and a systemd unit that runs it at boot. + # The script sources all fragments from /etc/devcell/entrypoint.d/ (installed + # by home-manager's stageEntrypoints activation script) with vagrant-appropriate + # env vars: DEVCELL_HOME=/home/vagrant, HOST_USER=vagrant, etc. + # + # Vagrant env → Docker env mapping: + # DEVCELL_HOME=/home/vagrant ← /opt/devcell (home-manager files land in $HOME) + # HOST_USER=vagrant ← $HOST_USER + # DEVCELL_GUI_ENABLED ← set from stack ({{GUI_ENABLED}}) + # gosu() shim via runuser ← /usr/sbin/gosu binary + config.vm.provision "devcell-init", type: "shell", run: "always", privileged: true, inline: <<~SHELL + set -euo pipefail + + cat > /usr/local/bin/devcell-init << 'INITSCRIPT' +#!/bin/bash +# DevCell init — sources entrypoint fragments at VM boot. +# Mirrors images/entrypoint.sh with vagrant-appropriate env vars. +export HOST_USER=vagrant +export HOME=/home/vagrant +export USER=vagrant +export DEVCELL_HOME=/home/vagrant +export APP_NAME={{VM_NAME}} +export DEVCELL_GUI_ENABLED={{GUI_ENABLED}} + +_T0=$(($(date +%s%N) / 1000000)) +log() { + local _ms=$(( $(date +%s%N) / 1000000 - _T0 )) + printf '[devcell-init %d.%03ds] %s\n' $((_ms/1000)) $((_ms%1000)) "$*" +} + +# gosu shim — vagrant uses runuser instead of the gosu binary +gosu() { local _u="$1"; shift; runuser -u "$_u" -- "$@"; } + +log "start (app={{VM_NAME}} gui={{GUI_ENABLED}})" + +if [ -d /etc/devcell/entrypoint.d ]; then + for f in /etc/devcell/entrypoint.d/*.sh; do + [ -x "$f" ] || continue + log "sourcing $(basename $f)" + . "$f" || log "⚠ fragment failed: $(basename $f) (exit $?)" + done +fi + +log "done" +INITSCRIPT + chmod +x /usr/local/bin/devcell-init + + cat > /etc/systemd/system/devcell-init.service << 'UNIT' +[Unit] +Description=DevCell initialization (entrypoint fragments) +After=network.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/bin/devcell-init +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target +UNIT + + systemctl daemon-reload + systemctl enable devcell-init + systemctl restart devcell-init || true + SHELL + + # ── Nix + home-manager provisioner ─────────────────────────────────────── + # Runs once on first `vagrant up` (run: "once"). + # Use `vagrant up --provision` or `cell build --engine=vagrant` to re-run. + # Stack: {{STACK}} — reflected in nixhome/hosts/linux/stack.nix at provision time. + # + # nixhome source priority: + # 1. ~/nixhome — uploaded by 'cell build --engine=vagrant' + # 2. /opt/nixhome — rsync'd by libvirt (fast, uses local nixhome/) + # 3. GitHub — fetched when nixhome not present locally + config.vm.provision "nix", type: "shell", run: "once", privileged: false, inline: <<~SHELL + set -euo pipefail + + # Install Nix (no-daemon, single-user) if not already present. + # --no-channel-add skips `nix-channel --update` which can hang the SSH session. + # Channels are unused — we use flakes exclusively. + if ! command -v nix >/dev/null 2>&1; then + curl -sSfL https://nixos.org/nix/install | sh -s -- --no-daemon --no-channel-add + fi + + # Source Nix environment + if [ -f "$HOME/.nix-profile/etc/profile.d/nix.sh" ]; then + . "$HOME/.nix-profile/etc/profile.d/nix.sh" + fi + + # Enable flakes + mkdir -p "$HOME/.config/nix" + echo "experimental-features = nix-command flakes" > "$HOME/.config/nix/nix.conf" + + # Resolve nixhome: uploaded by 'cell build' to ~/nixhome, or GitHub fallback. + if [ -d "$HOME/nixhome" ]; then + NIXHOME_FLAKE="$HOME/nixhome" + elif [ -d /opt/nixhome ]; then + NIXHOME_FLAKE="/opt/nixhome" + else + NIXHOME_FLAKE="github:DimmKirr/devcell?dir=nixhome" + echo "devcell: nixhome not found locally — fetching from GitHub" + fi + + # Select flake target based on host architecture. + # aarch64-linux targets use the "-aarch64" suffix in the nixhome flake. + ARCH=$(uname -m) + if [ "$ARCH" = "aarch64" ]; then + ARCH_SUFFIX="-aarch64" + else + ARCH_SUFFIX="" + fi + + # Apply home-manager configuration for the Linux vagrant VM. + # Uses vagrant-linux config (username=vagrant, homeDirectory=/home/vagrant). + # Customize .devcell/nixhome/hosts/linux/home.nix to add more modules. + nix run home-manager/release-25.11 -- switch \ + --flake "${NIXHOME_FLAKE}#vagrant-linux${ARCH_SUFFIX}" \ + --show-trace + + echo "devcell: home-manager switch complete (config: vagrant-linux${ARCH_SUFFIX}, flake: ${NIXHOME_FLAKE})" + SHELL +end diff --git a/internal/scaffold/vagrant_linux_test.go b/internal/scaffold/vagrant_linux_test.go new file mode 100644 index 0000000..4384495 --- /dev/null +++ b/internal/scaffold/vagrant_linux_test.go @@ -0,0 +1,165 @@ +package scaffold_test + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/DimmKirr/devcell/internal/scaffold" +) + +// L1: Pure template content tests — check raw template bytes contain required strings. +// No file I/O beyond reading the embedded template. + +func TestLinuxVagrantfileTemplate_ContainsAllPlaceholders(t *testing.T) { + placeholders := []string{ + "{{VAGRANT_BOX}}", + "{{VM_NAME}}", + "{{PROJECT_DIR}}", + "{{STACK}}", + "{{VNC_PORT}}", + "{{RDP_PORT}}", + "{{HOST_HOME}}", + "{{CONFIG_DIR}}", + } + tmpl := string(scaffold.LinuxVagrantfileContent) + for _, ph := range placeholders { + if !strings.Contains(tmpl, ph) { + t.Errorf("template missing placeholder %q", ph) + } + } +} + +func TestLinuxVagrantfileTemplate_ContainsBothProviders(t *testing.T) { + tmpl := string(scaffold.LinuxVagrantfileContent) + for _, provider := range []string{"utm", "libvirt"} { + if !strings.Contains(tmpl, provider) { + t.Errorf("template missing provider block %q", provider) + } + } +} + +func TestLinuxVagrantfileTemplate_NixProvisionerRunsOnce(t *testing.T) { + tmpl := string(scaffold.LinuxVagrantfileContent) + if !strings.Contains(tmpl, `run: "once"`) { + t.Error(`template missing nix provisioner run: "once" marker`) + } + if !strings.Contains(tmpl, "--flake \"${NIXHOME_FLAKE}#vagrant-") { + t.Error("template missing home-manager --flake ${NIXHOME_FLAKE}#vagrant- command") + } +} + +func TestLinuxVagrantfileTemplate_ContainsClaudeSyncedFolders(t *testing.T) { + tmpl := string(scaffold.LinuxVagrantfileContent) + for _, path := range []string{".claude/commands", ".claude/agents", ".claude/skills"} { + if !strings.Contains(tmpl, path) { + t.Errorf("template must contain %q synced folder", path) + } + } +} + +func TestLinuxVagrantfileTemplate_ContainsEtcDevcellConfig(t *testing.T) { + tmpl := string(scaffold.LinuxVagrantfileContent) + if !strings.Contains(tmpl, "/etc/devcell/config") { + t.Error("template must contain /etc/devcell/config synced folder target") + } +} + +// L2: File I/O tests — ScaffoldLinuxVagrantfile writes correct content to disk. +// Safe in container: only uses t.TempDir(), no external processes. + +func TestScaffoldLinuxVagrantfile_CreatesFile(t *testing.T) { + dir := t.TempDir() + err := scaffold.ScaffoldLinuxVagrantfile(dir, "debian/bookworm64", "utm", "ultimate", "/proj", "/nixhome", "5900", "3389", "/home/bob", "/home/bob/.config/devcell") + if err != nil { + t.Fatalf("ScaffoldLinuxVagrantfile: %v", err) + } + if _, err := os.Stat(filepath.Join(dir, "Vagrantfile")); err != nil { + t.Errorf("Vagrantfile not created: %v", err) + } +} + +func TestScaffoldLinuxVagrantfile_SubstitutesAllValues(t *testing.T) { + dir := t.TempDir() + if err := scaffold.ScaffoldLinuxVagrantfile(dir, "debian/bookworm64", "utm", "ultimate", "/proj", "/nixhome", "5900", "3389", "/home/bob", "/home/bob/.config/devcell"); err != nil { + t.Fatal(err) + } + data, _ := os.ReadFile(filepath.Join(dir, "Vagrantfile")) + content := string(data) + + for _, want := range []string{"debian/bookworm64", "ultimate", "/proj", "5900", "3389"} { + if !strings.Contains(content, want) { + t.Errorf("Vagrantfile missing %q", want) + } + } + // GUI_ENABLED=true for ultimate stack + if !strings.Contains(content, "DEVCELL_GUI_ENABLED=true") { + t.Error("Vagrantfile missing DEVCELL_GUI_ENABLED=true for ultimate stack") + } + // No raw placeholders must remain + for _, ph := range []string{"{{VAGRANT_BOX}}", "{{VM_NAME}}", "{{PROJECT_DIR}}", "{{STACK}}", "{{VNC_PORT}}", "{{RDP_PORT}}", "{{GUI_ENABLED}}"} { + if strings.Contains(content, ph) { + t.Errorf("Vagrantfile still contains unsubstituted placeholder %q", ph) + } + } +} + +func TestScaffoldLinuxVagrantfile_AlwaysRegenerates(t *testing.T) { + dir := t.TempDir() + if err := scaffold.ScaffoldLinuxVagrantfile(dir, "debian/bookworm64", "utm", "ultimate", "/proj", "/nixhome", "5900", "3389", "/home/bob", "/home/bob/.config/devcell"); err != nil { + t.Fatal(err) + } + // Second call with different params must overwrite — Vagrantfile is a generated artifact. + if err := scaffold.ScaffoldLinuxVagrantfile(dir, "other-box", "kvm", "base", "/other", "/other", "5901", "3390", "/home/other", "/home/other/.config/devcell"); err != nil { + t.Fatal(err) + } + data, _ := os.ReadFile(filepath.Join(dir, "Vagrantfile")) + content := string(data) + if !strings.Contains(content, "other-box") { + t.Error("ScaffoldLinuxVagrantfile did not overwrite existing Vagrantfile with new params") + } + if strings.Contains(content, "debian/bookworm64") { + t.Error("ScaffoldLinuxVagrantfile left stale content from first call") + } +} + +func TestScaffoldLinuxVagrantfile_CustomBox(t *testing.T) { + dir := t.TempDir() + if err := scaffold.ScaffoldLinuxVagrantfile(dir, "my-custom-box", "utm", "go", "/proj", "/nixhome", "5900", "3389", "/home/bob", "/home/bob/.config/devcell"); err != nil { + t.Fatal(err) + } + data, _ := os.ReadFile(filepath.Join(dir, "Vagrantfile")) + if !strings.Contains(string(data), "my-custom-box") { + t.Error("Vagrantfile missing custom box name") + } +} + +func TestScaffoldLinuxVagrantfile_HostHomeSubstituted(t *testing.T) { + dir := t.TempDir() + if err := scaffold.ScaffoldLinuxVagrantfile(dir, "utm/bookworm", "utm", "go", "/home/bob/project", "/home/bob/nixhome", "5900", "3389", "/home/bob", "/home/bob/.config/devcell"); err != nil { + t.Fatalf("ScaffoldLinuxVagrantfile: %v", err) + } + data, _ := os.ReadFile(filepath.Join(dir, "Vagrantfile")) + content := string(data) + if strings.Contains(content, "{{HOST_HOME}}") { + t.Error("{{HOST_HOME}} not substituted in Vagrantfile") + } + if strings.Contains(content, "{{CONFIG_DIR}}") { + t.Error("{{CONFIG_DIR}} not substituted in Vagrantfile") + } + if !strings.Contains(content, "/home/bob/.claude") { + t.Error("expected /home/bob/.claude in Vagrantfile after substitution") + } +} + +func TestScaffoldLinuxVagrantfile_KVMProvider(t *testing.T) { + dir := t.TempDir() + if err := scaffold.ScaffoldLinuxVagrantfile(dir, "debian/bookworm64", "kvm", "ultimate", "/proj", "/nixhome", "5900", "3389", "/home/bob", "/home/bob/.config/devcell"); err != nil { + t.Fatal(err) + } + data, _ := os.ReadFile(filepath.Join(dir, "Vagrantfile")) + if !strings.Contains(string(data), "kvm") { + t.Error("Vagrantfile missing kvm provider reference") + } +} diff --git a/internal/ux/build_errors.go b/internal/ux/build_errors.go new file mode 100644 index 0000000..a15366b --- /dev/null +++ b/internal/ux/build_errors.go @@ -0,0 +1,146 @@ +package ux + +import ( + "fmt" + "strings" +) + +// BuildErrorHint describes a user-facing explanation and fix for a known build failure. +type BuildErrorHint struct { + Title string + Body string + Fixes []string +} + +// buildErrorPatterns maps output substrings to user-facing hints. +// Patterns are checked case-insensitively in order; first match wins. +var buildErrorPatterns = []struct { + needle string + hint BuildErrorHint +}{ + { + needle: "no space left on device", + hint: BuildErrorHint{ + Title: "No space left on device", + Body: "Docker ran out of disk space during the build.", + Fixes: []string{ + "docker buildx prune -af # safe — clears build cache only", + "docker image prune # run after stopping old devcell containers", + }, + }, + }, + { + needle: "failed to fetch", + hint: BuildErrorHint{ + Title: "Nix fetch failed", + Body: "A nix package could not be downloaded. This is usually a network issue.", + Fixes: []string{ + "cell build # retry — may be a transient failure", + "cell build --update # refresh flake inputs and rebuild", + }, + }, + }, + { + needle: "dial tcp", + hint: BuildErrorHint{ + Title: "Network error during build", + Body: "Docker could not reach a remote host. Check your internet connection.", + Fixes: []string{ + "cell build # retry after checking connectivity", + }, + }, + }, + { + needle: "error: attribute", + hint: BuildErrorHint{ + Title: "Nix attribute error", + Body: "A package attribute in your nixhome config does not exist in nixpkgs.", + Fixes: []string{ + "task nix:validate # check nix syntax and attribute names", + }, + }, + }, + { + needle: "error: undefined variable", + hint: BuildErrorHint{ + Title: "Nix undefined variable", + Body: "Your nixhome config references a variable that is not in scope.", + Fixes: []string{ + "task nix:validate # check nix syntax", + }, + }, + }, + { + needle: "dockerfile parse error", + hint: BuildErrorHint{ + Title: "Dockerfile syntax error", + Body: "The generated Dockerfile contains a syntax error.", + Fixes: []string{ + "cell build # retry after checking .devcell/Dockerfile", + }, + }, + }, + { + needle: "permission denied", + hint: BuildErrorHint{ + Title: "Permission denied", + Body: "A file or directory could not be accessed during the build.", + Fixes: []string{ + "ls -la .devcell/ # inspect build context permissions", + }, + }, + }, + { + needle: "cannot connect to the docker daemon", + hint: BuildErrorHint{ + Title: "Docker daemon is not running", + Body: "Could not connect to the Docker daemon. Docker Desktop may not be started.", + Fixes: []string{ + "open -a Docker # start Docker Desktop (macOS)", + " # or start Docker Desktop from the menu bar", + }, + }, + }, + // Fallback: generic docker build failure — must be last. + { + needle: "docker build: exit status", + hint: BuildErrorHint{ + Title: "Docker build failed", + Body: "The docker build command exited with an error. Run with --debug to see the full output.", + Fixes: []string{ + "cell build --debug # stream full build log", + }, + }, + }, +} + +// ClassifyBuildOutput scans docker build output for known error patterns and +// returns a user-facing hint. Returns nil when no pattern matches. +func ClassifyBuildOutput(output string) *BuildErrorHint { + lower := strings.ToLower(output) + for _, p := range buildErrorPatterns { + if strings.Contains(lower, p.needle) { + h := p.hint + return &h + } + } + return nil +} + +// PrintBuildErrorHint renders a user-facing error panel for a build failure hint. +func PrintBuildErrorHint(hint *BuildErrorHint) { + border := StyleError.Render("─────────────────────────────────────────") + fmt.Println() + fmt.Printf(" %s\n", border) + fmt.Printf(" %s %s\n", StyleError.Render("✗"), StyleBold.Render(hint.Title)) + fmt.Printf(" %s\n", StyleMuted.Render(hint.Body)) + if len(hint.Fixes) > 0 { + fmt.Println() + fmt.Printf(" %s\n", StyleBold.Render("To fix:")) + for _, fix := range hint.Fixes { + fmt.Printf(" %s %s\n", StyleAccent.Render("•"), StyleInfo.Render(fix)) + } + } + fmt.Printf(" %s\n", border) + fmt.Println() +} diff --git a/internal/ux/build_errors_test.go b/internal/ux/build_errors_test.go new file mode 100644 index 0000000..f725a02 --- /dev/null +++ b/internal/ux/build_errors_test.go @@ -0,0 +1,103 @@ +package ux + +import ( + "strings" + "testing" +) + +func TestClassifyBuildOutput_NoSpace(t *testing.T) { + output := `#8 0.128 mktemp: failed to create directory via template '/tmp/home-manager-build.XXXXXXXXXX': No space left on device +#8 ERROR: process did not complete successfully: exit code: 1` + hint := ClassifyBuildOutput(output) + if hint == nil { + t.Fatal("expected hint for no-space output, got nil") + } + if !strings.Contains(hint.Title, "No space") { + t.Errorf("unexpected title: %q", hint.Title) + } + if len(hint.Fixes) == 0 { + t.Error("expected at least one fix suggestion") + } +} + +func TestClassifyBuildOutput_FetchError(t *testing.T) { + output := `error: failed to fetch https://cache.nixos.org/abc.narinfo: connection refused` + hint := ClassifyBuildOutput(output) + if hint == nil { + t.Fatal("expected hint for fetch error, got nil") + } + if !strings.Contains(hint.Title, "fetch") { + t.Errorf("unexpected title: %q", hint.Title) + } +} + +func TestClassifyBuildOutput_NetworkError(t *testing.T) { + output := `dial tcp 1.2.3.4:443: i/o timeout` + hint := ClassifyBuildOutput(output) + if hint == nil { + t.Fatal("expected hint for network error, got nil") + } + if !strings.Contains(strings.ToLower(hint.Title), "network") { + t.Errorf("unexpected title: %q", hint.Title) + } +} + +func TestClassifyBuildOutput_NixAttribute(t *testing.T) { + output := `error: attribute 'pkgs.alejnadra' missing` + hint := ClassifyBuildOutput(output) + if hint == nil { + t.Fatal("expected hint for nix attribute error, got nil") + } +} + +func TestClassifyBuildOutput_NoMatch(t *testing.T) { + output := `some completely unknown build failure text` + hint := ClassifyBuildOutput(output) + if hint != nil { + t.Errorf("expected nil for unrecognized output, got %+v", hint) + } +} + +func TestClassifyBuildOutput_CaseInsensitive(t *testing.T) { + output := `NO SPACE LEFT ON DEVICE` + hint := ClassifyBuildOutput(output) + if hint == nil { + t.Fatal("expected hint for uppercase pattern, got nil") + } +} + +func TestClassifyBuildOutput_DockerDaemon(t *testing.T) { + output := `ERROR: Cannot connect to the Docker daemon at unix:///Users/dmitry/.docker/run/docker.sock. Is the docker daemon running? +Error: docker build: exit status 1` + hint := ClassifyBuildOutput(output) + if hint == nil { + t.Fatal("expected hint for docker daemon error, got nil") + } + if !strings.Contains(strings.ToLower(hint.Title), "daemon") { + t.Errorf("unexpected title: %q", hint.Title) + } +} + +func TestClassifyBuildOutput_GenericDockerFallback(t *testing.T) { + output := `Error: docker build: exit status 1` + hint := ClassifyBuildOutput(output) + if hint == nil { + t.Fatal("expected fallback hint for generic docker build failure, got nil") + } + if !strings.Contains(strings.ToLower(hint.Title), "docker build failed") { + t.Errorf("unexpected title: %q", hint.Title) + } +} + +func TestClassifyBuildOutput_DockerDaemonBeforeFallback(t *testing.T) { + // daemon error must match before the generic fallback even though both patterns present + output := `Cannot connect to the Docker daemon at unix:///var/run/docker.sock +Error: docker build: exit status 1` + hint := ClassifyBuildOutput(output) + if hint == nil { + t.Fatal("expected hint, got nil") + } + if !strings.Contains(strings.ToLower(hint.Title), "daemon") { + t.Errorf("daemon pattern should win over fallback, got title: %q", hint.Title) + } +} diff --git a/internal/ux/table.go b/internal/ux/table.go new file mode 100644 index 0000000..8c5ff8a --- /dev/null +++ b/internal/ux/table.go @@ -0,0 +1,197 @@ +package ux + +import ( + "fmt" + "os" + "regexp" + + "github.com/charmbracelet/bubbles/table" + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" +) + +// ansiRe strips ANSI SGR escape sequences (colors, bold, etc.) from a string. +var ansiRe = regexp.MustCompile(`\x1b\[[0-9;]*m`) + +func stripANSI(s string) string { return ansiRe.ReplaceAllString(s, "") } + +// SortKey identifies the column being sorted in the interactive table. +type SortKey string + +const ( + SortRecommended SortKey = "r" + SortSWE SortKey = "s" + SortSpeed SortKey = "z" + SortSize SortKey = "p" +) + +// SortKeyString converts a SortKey to the string value passed to RankModels sortBy. +func SortKeyString(key SortKey) string { + switch key { + case SortSWE: + return "swe" + case SortSpeed: + return "speed" + case SortSize: + return "size" + default: + return "recommended" + } +} + +// InteractiveTable displays headers+rows in an interactive bubbles/table TUI. +// sortHandler is called when the user presses a sort key (r/s/z/p) and should +// return the re-sorted rows. Falls back to PrintTable for non-TTY or non-text mode. +func InteractiveTable( + headers []string, + rows [][]string, + sortHandler func(key SortKey) [][]string, +) { + if !isTTY() || OutputFormat != "text" { + PrintTable(headers, rows) + return + } + + m := &tableModel{ + headers: headers, + rows: rows, + sortHandler: sortHandler, + currentSort: SortRecommended, + } + m.rebuild() + + p := tea.NewProgram(m) + if _, err := p.Run(); err != nil { + PrintTable(headers, rows) + } +} + +type tableModel struct { + t table.Model + headers []string + rows [][]string + sortHandler func(key SortKey) [][]string + currentSort SortKey + termHeight int // set by tea.WindowSizeMsg; used to cap visible rows +} + +var baseTableStyle = lipgloss.NewStyle(). + BorderStyle(lipgloss.NormalBorder()). + BorderForeground(colorBorder) + +func (m *tableModel) rebuild() { + cols := make([]table.Column, len(m.headers)) + for i, h := range m.headers { + w := lipgloss.Width(h) + 2 + for _, row := range m.rows { + if i < len(row) { + if vw := lipgloss.Width(row[i]) + 2; vw > w { + w = vw + } + } + } + if w > 40 { + w = 40 + } + cols[i] = table.Column{Title: h, Width: w} + } + + // Strip ANSI codes before passing cells to bubbles/table. + // Pre-styled cells (modGray.Render etc.) corrupt the table's internal + // rendering — columns shift, content gets clipped. Plain text only here; + // PrintTable uses the styled version for non-interactive output. + trows := make([]table.Row, len(m.rows)) + for i, r := range m.rows { + plain := make([]string, len(r)) + for j, cell := range r { + plain[j] = stripANSI(cell) + } + trows[i] = table.Row(plain) + } + + // Cap viewport height so the full view (blank+border+header+rows+border+help) + // fits within the terminal. Total overhead = 7 lines (1 blank, 2 outer border, + // 2 header+separator, 1 blank, 1 help). Default termHeight=24 until first + // tea.WindowSizeMsg arrives. + termH := m.termHeight + if termH <= 0 { + termH = 24 + } + maxRows := termH - 7 + if maxRows < 3 { + maxRows = 3 + } + height := len(m.rows) + if height > maxRows { + height = maxRows + } + + t := table.New( + table.WithColumns(cols), + table.WithRows(trows), + table.WithFocused(true), + table.WithHeight(height), + ) + s := table.DefaultStyles() + s.Header = s.Header. + BorderStyle(lipgloss.NormalBorder()). + BorderForeground(colorBorder). + BorderBottom(true). + Bold(true). + Foreground(lipgloss.AdaptiveColor{Light: "#24292f", Dark: "#cdd9e5"}) + s.Selected = s.Selected. + Foreground(lipgloss.Color("#E85D26")). + Bold(true) + t.SetStyles(s) + m.t = t +} + +func (m *tableModel) Init() tea.Cmd { return nil } + +func (m *tableModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { + switch msg := msg.(type) { + case tea.WindowSizeMsg: + m.termHeight = msg.Height + m.rebuild() + return m, nil + case tea.KeyMsg: + switch msg.String() { + case "q", "ctrl+c", "esc": + return m, tea.Quit + case "r", "s", "z", "p": + key := SortKey(msg.String()) + if m.sortHandler != nil { + m.rows = m.sortHandler(key) + m.currentSort = key + m.rebuild() + } + return m, nil + } + } + var cmd tea.Cmd + m.t, cmd = m.t.Update(msg) + return m, cmd +} + +func (m *tableModel) View() string { + sortLabel := map[SortKey]string{ + SortRecommended: "Recommended", + SortSWE: "SWE-bench", + SortSpeed: "Speed", + SortSize: "Size", + }[m.currentSort] + + help := StyleMuted.Render(fmt.Sprintf( + " Sort: [r]ecommended [s]we-bench [z]speed [p]size • active: %s • [q]uit", + sortLabel, + )) + return "\n" + baseTableStyle.Render(m.t.View()) + "\n" + help + "\n" +} + +func isTTY() bool { + fi, err := os.Stdout.Stat() + if err != nil { + return false + } + return fi.Mode()&os.ModeCharDevice != 0 +} diff --git a/nixhome/flake.nix b/nixhome/flake.nix index 2f8225d..7a28350 100644 --- a/nixhome/flake.nix +++ b/nixhome/flake.nix @@ -35,8 +35,11 @@ mkHome = system: modules: let nixCfg = { inherit system; - config.allowUnfreePredicate = pkg: - builtins.elem (lib.getName pkg) ["claude-code" "corefonts" "drawio" "packer" "terraform"]; + # allowUnfree covers: claude-code, corefonts, drawio, packer, terraform, + # and Android SDK bundles (platform-tools, build-tools, emulator, etc. — + # too many sub-derivation names to enumerate in a predicate). + config.allowUnfree = true; + config.android_sdk.accept_license = true; }; pkgsUnstable = import nixpkgs-unstable nixCfg; pkgsEdge = import nixpkgs-edge nixCfg; @@ -81,6 +84,45 @@ ) {} stacks; + + # Vagrant VM configs — same stacks but for the 'vagrant' user at /home/vagrant. + # Applied by the Vagrantfile provisioner: + # home-manager switch --flake .#vagrant-ultimate-aarch64 + vagrantUser = {username = "vagrant"; homeDirectory = "/home/vagrant";}; + mkVagrantHome = system: modules: let + nixCfg = { + inherit system; + config.allowUnfree = true; + config.android_sdk.accept_license = true; + }; + pkgsUnstable = import nixpkgs-unstable nixCfg; + pkgsEdge = import nixpkgs-edge nixCfg; + in + home-manager.lib.homeManagerConfiguration { + pkgs = import nixpkgs nixCfg; + extraSpecialArgs = {inherit mcp-nixos pkgsUnstable pkgsEdge;}; + modules = + [ + { + home.stateVersion = "25.11"; + home.username = vagrantUser.username; + home.homeDirectory = vagrantUser.homeDirectory; + } + ] + ++ modules; + }; + mkAllVagrantConfigs = + lib.foldlAttrs + ( + acc: name: mods: let + shortName = lib.removePrefix "devcell-" name; + in + acc + // {"vagrant-${shortName}" = mkVagrantHome "x86_64-linux" mods;} + // {"vagrant-${shortName}-aarch64" = mkVagrantHome "aarch64-linux" mods;} + ) + {} + stacks; in { # Expose building blocks so user wrapper flakes can compose custom stacks: # devcell.lib.mkHome "x86_64-linux" [ devcell.stacks.go ] @@ -92,6 +134,7 @@ # Individual modules for composing custom stacks in user wrapper flakes: # devcell.lib.mkHome "x86_64-linux" (devcell.stacks.go ++ devcell.modules.electronics) modules = { + android = [./modules/android.nix]; apple = [./modules/apple.nix]; base = [./modules/base.nix]; build = [./modules/build.nix]; @@ -106,15 +149,17 @@ news = [./modules/news.nix]; nixos = [./modules/nixos.nix]; node = [./modules/node.nix]; + postgresql = [./modules/postgresql.nix]; project-management = [./modules/project-management.nix]; python = [./modules/python.nix]; qa-tools = [./modules/qa-tools.nix]; scraping = [./modules/scraping]; + security = [./modules/security.nix]; shell = [./modules/shell.nix]; travel = [./modules/travel.nix]; }; - homeConfigurations = mkAllConfigs; + homeConfigurations = mkAllConfigs // mkAllVagrantConfigs; # macOS VM (Vagrant/UTM) — applied via: darwin-rebuild switch --flake .#macOS darwinConfigurations.macOS = nix-darwin.lib.darwinSystem { diff --git a/nixhome/hosts/linux/home.nix b/nixhome/hosts/linux/home.nix new file mode 100644 index 0000000..c5b62d4 --- /dev/null +++ b/nixhome/hosts/linux/home.nix @@ -0,0 +1,16 @@ +# hosts/linux/home.nix — home-manager config for the vagrant user on the Linux VM. +# Essential vagrant modules (shell, LLM tools, mise) are always included. +# Stack + extra modules are controlled by ./stack.nix, which is regenerated +# from .devcell.toml by `cell claude --engine=vagrant` / `cell build --engine=vagrant`. +{ mcp-nixos, ... }: { + imports = [ + ./stack.nix # Generated by cell — stack + extra modules from .devcell.toml + ../../modules/shell.nix + ../../modules/llm + ../../modules/mise.nix + ]; + + home.username = "vagrant"; + home.homeDirectory = "/home/vagrant"; + home.stateVersion = "25.11"; +} diff --git a/nixhome/hosts/linux/stack.nix b/nixhome/hosts/linux/stack.nix new file mode 100644 index 0000000..bc2038f --- /dev/null +++ b/nixhome/hosts/linux/stack.nix @@ -0,0 +1,6 @@ +# Generated by cell — do not edit. Stack: ultimate +{ ... }: { + imports = [ + ../../stacks/ultimate.nix + ]; +} diff --git a/nixhome/modules/android.nix b/nixhome/modules/android.nix new file mode 100644 index 0000000..0c45ad8 --- /dev/null +++ b/nixhome/modules/android.nix @@ -0,0 +1,50 @@ +# android.nix — Android SDK and development tools +# +# Provides: Android SDK, ADB, build tools, apktool, jadx +# +# NOTE on platform support: Android SDK packages (aapt, build-tools, emulator) +# are x86_64-linux only in nixpkgs — they are marked badPlatforms for aarch64-linux. +# On aarch64-linux (Apple Silicon Docker, ARM servers) this module is a no-op. +# Use a physical device + ADB over USB, or a cloud emulator (Firebase Test Lab). +# +# NOTE on emulator: Running the Android emulator requires KVM (/dev/kvm). +# On Linux hosts, pass --device /dev/kvm to docker run. +{pkgs, lib, ...}: let + isX86Linux = pkgs.stdenv.hostPlatform.system == "x86_64-linux"; + + # Android SDK composition via androidenv. + # System images are NOT included — download via sdkmanager after first run: + # sdkmanager "system-images;android-35;google_apis;x86_64" + # avdmanager create avd -n pixel9 -k "system-images;android-35;google_apis;x86_64" -d pixel_9 + androidSdk = pkgs.androidenv.composeAndroidPackages { + platformToolsVersion = "35.0.2"; + buildToolsVersions = ["35.0.0"]; + platformVersions = ["35"]; + includeEmulator = true; + emulatorVersion = "35.3.12"; + includeSystemImages = false; + useGoogleAPIs = true; + useGoogleTVAddOns = false; + extraLicenses = [ + "android-sdk-license" + "android-sdk-preview-license" + "google-gdk-license" + ]; + }; +in { + home.packages = + [ pkgs.android-tools ] # adb + fastboot, compiled from source (all platforms) + ++ lib.optionals isX86Linux [ + androidSdk.androidsdk # full SDK + build-tools + emulator (x86_64 only) + pkgs.apktool # APK decompile/recompile (reverse engineering / QA) + pkgs.jadx # DEX/APK decompiler to readable Java/Kotlin + ]; + + # ANDROID_HOME is the canonical SDK root; ANDROID_SDK_ROOT is the legacy alias. + # Both are needed because different tools check different vars. + # Only set on x86_64-linux where the SDK is actually installed. + home.sessionVariables = lib.mkIf isX86Linux { + ANDROID_HOME = "${androidSdk.androidsdk}/libexec/android-sdk"; + ANDROID_SDK_ROOT = "${androidSdk.androidsdk}/libexec/android-sdk"; + }; +} diff --git a/nixhome/modules/electronics.nix b/nixhome/modules/electronics.nix index 29f8f90..a546a1c 100644 --- a/nixhome/modules/electronics.nix +++ b/nixhome/modules/electronics.nix @@ -64,6 +64,7 @@ in { ngspice # SPICE simulation (libngspice0 + ngspice CLI) libspnav # 3D mouse / space navigator support esphome # ESP32 framework for home automation + platformio # embedded development platform (Arduino, ESP32, etc.) wokwi-cli # Wokwi hardware simulator CLI (v0.26.0 static binary) kicadMcp # KiCad MCP server for Claude ] diff --git a/nixhome/modules/scraping/default.nix b/nixhome/modules/scraping/default.nix index 9e23384..4ef1db0 100644 --- a/nixhome/modules/scraping/default.nix +++ b/nixhome/modules/scraping/default.nix @@ -248,6 +248,52 @@ async function __hmMove(page, tx, ty) {\ }); } + // Spoof platform + userAgentData from cell login fingerprint. + // window.__cellFp is injected by the patchright-mcp-cell preamble init script + // when $HOME/playwright-fingerprint.json exists (written by `cell login` on macOS). + if (window.__cellFp) { + // navigator.platform → "MacIntel" + Object.defineProperty(Navigator.prototype, 'platform', { + get: () => window.__cellFp.platform || 'MacIntel', + configurable: true + }); + + if (typeof NavigatorUAData !== 'undefined') { + // navigator.userAgentData.platform → "macOS" + const _fpPlatformDesc = Object.getOwnPropertyDescriptor(NavigatorUAData.prototype, 'platform'); + if (_fpPlatformDesc) { + Object.defineProperty(NavigatorUAData.prototype, 'platform', { + get: () => window.__cellFp.uaPlatform || 'macOS', + configurable: true + }); + } + + // navigator.userAgentData.brands → Chrome brands + const _fpBrandsDesc = Object.getOwnPropertyDescriptor(NavigatorUAData.prototype, 'brands'); + if (_fpBrandsDesc && window.__cellFp.brands) { + Object.defineProperty(NavigatorUAData.prototype, 'brands', { + get: () => window.__cellFp.brands, + configurable: true + }); + } + + // Extend existing getHighEntropyValues to also return macOS platform + brands + const _fpOrigGetHigh = NavigatorUAData.prototype.getHighEntropyValues; + if (_fpOrigGetHigh) { + Object.defineProperty(NavigatorUAData.prototype, 'getHighEntropyValues', { + value: async function(hints) { + const values = await _fpOrigGetHigh.call(this, hints); + if (window.__cellFp.uaPlatform) values.platform = window.__cellFp.uaPlatform; + if (window.__cellFp.brands) values.brands = window.__cellFp.brands; + return values; + }, + writable: true, + configurable: true + }); + } + } + } + // --- Web Share API stubs (noWebShare signal) --- if (!navigator.share) { navigator.share = function(data) { @@ -669,13 +715,219 @@ async function __hmMove(page, tx, ty) {\ _nativeFnNames.set(window.ContactsManager, 'ContactsManager'); } - // --- Fix noDownlinkMax: mock NetworkInformation.downlinkMax --- + // --- NetworkInformation — realistic 4G WiFi connection profile --- if (navigator.connection) { - Object.defineProperty(navigator.connection, 'downlinkMax', { - get: () => Infinity, - configurable: true - }); + var _connProps = { + effectiveType: '4g', + downlink: 10.5, + downlinkMax: Infinity, + rtt: 50, + saveData: false, + type: 'wifi', + }; + for (var _cp in _connProps) { + (function(k, v) { + Object.defineProperty(navigator.connection, k, { + get: function() { return v; }, configurable: true + }); + })(_cp, _connProps[_cp]); + } + } + + // --- hardwareConcurrency: container CPU count leaks as bot signal --- + // Spoof to 8 (common mid-range laptop value). + Object.defineProperty(navigator, 'hardwareConcurrency', { + get: () => 8, configurable: true + }); + + // --- speechSynthesis.getVoices() returns [] on headless — bot signal --- + if (window.speechSynthesis) { + var _fakeVoices = [ + { voiceURI: 'Google US English', name: 'Google US English', lang: 'en-US', localService: false, default: true }, + { voiceURI: 'Google UK English Female', name: 'Google UK English Female', lang: 'en-GB', localService: false, default: false }, + { voiceURI: 'Google UK English Male', name: 'Google UK English Male', lang: 'en-GB', localService: false, default: false }, + { voiceURI: 'Google Deutsch', name: 'Google Deutsch', lang: 'de-DE', localService: false, default: false }, + { voiceURI: 'Google español', name: 'Google español', lang: 'es-ES', localService: false, default: false }, + { voiceURI: 'Google français', name: 'Google français', lang: 'fr-FR', localService: false, default: false }, + ].map(function(v) { return Object.assign(Object.create(SpeechSynthesisVoice.prototype), v); }); + var _origGV = window.speechSynthesis.getVoices.bind(window.speechSynthesis); + window.speechSynthesis.getVoices = function() { + var real = _origGV(); + return real.length > 0 ? real : _fakeVoices; + }; + _nativeFnNames.set(window.speechSynthesis.getVoices, 'getVoices'); } + + // --- Battery API spoof (charging:true + level:1.0 = classic VM signal) --- + // Real laptop: discharging, ~70% level, ~2h remaining. + // Use fixed-but-plausible values so repeated calls return the same object. + (function() { + var _level = 0.67 + (Math.floor(Date.now() / 86400000) % 20) / 100; + var _dtime = 6300 + (Math.floor(Date.now() / 3600000) % 60) * 60; + var _battery = { + charging: false, + chargingTime: Infinity, + dischargingTime: _dtime, + level: _level, + addEventListener: function() {}, + removeEventListener: function() {}, + dispatchEvent: function() { return false; }, + }; + Object.defineProperty(navigator, 'getBattery', { + value: function() { return Promise.resolve(_battery); }, + configurable: true, writable: true + }); + _nativeFnNames.set(navigator.getBattery, 'getBattery'); + })(); + + // --- Canvas noise (identical hash across all 5 contexts = cluster signal) --- + // Inject subtle per-session noise into canvas readback. Noise is stable + // within a session (same seed) but unique per browser launch. + // Only touches the alpha-zero (transparent) pixels to avoid visible artifacts. + (function() { + var _ns = (Math.random() * 0x7FFFFFFF) | 0; + function _nb(i) { + var x = (_ns ^ (i * 1664525 + 1013904223)) & 0xFF; + return (x & 1); + } + var _origTDU = HTMLCanvasElement.prototype.toDataURL; + HTMLCanvasElement.prototype.toDataURL = function(type, q) { + var ctx = this.getContext && this.getContext('2d'); + if (ctx && this.width > 0 && this.height > 0) { + var id = ctx.getImageData(0, 0, this.width, this.height); + var d = id.data; + for (var i = 0; i < d.length; i += 4) { + if (d[i+3] > 0) { d[i] = Math.max(0, d[i] - _nb(i)); } + } + ctx.putImageData(id, 0, 0); + } + return _origTDU.call(this, type, q); + }; + _nativeFnNames.set(HTMLCanvasElement.prototype.toDataURL, 'toDataURL'); + + var _origGID = CanvasRenderingContext2D.prototype.getImageData; + CanvasRenderingContext2D.prototype.getImageData = function(sx, sy, sw, sh) { + var id = _origGID.call(this, sx, sy, sw, sh); + var d = id.data; + for (var i = 0; i < d.length; i += 4) { + if (d[i+3] > 0) { d[i] = Math.max(0, d[i] - _nb(i)); } + } + return id; + }; + _nativeFnNames.set(CanvasRenderingContext2D.prototype.getImageData, 'getImageData'); + })(); + + // --- Audio fingerprint noise (OfflineAudioContext oscillator hash) --- + // Fingerprinting reads AudioBuffer.getChannelData() after rendering an oscillator. + // Add tiny per-session float noise (< 1e-7) — inaudible, changes the hash. + // Patch both AudioBuffer (OfflineAudioContext result) and AnalyserNode readbacks. + (function() { + var _as = (Math.random() * 0x7FFFFFFF) | 0; + function _af(i) { + var x = (_as ^ (i * 22695477 + 1)) >>> 0; + return (x & 0xFF) * 1e-9 - 1.275e-7; + } + + // AudioBuffer.getChannelData — main audio fingerprint vector + if (typeof AudioBuffer !== 'undefined') { + var _origGCD = AudioBuffer.prototype.getChannelData; + AudioBuffer.prototype.getChannelData = function(ch) { + var data = _origGCD.call(this, ch); + for (var i = 0; i < data.length; i++) { + data[i] = Math.max(-1, Math.min(1, data[i] + _af(i))); + } + return data; + }; + _nativeFnNames.set(AudioBuffer.prototype.getChannelData, 'getChannelData'); + } + + // AnalyserNode.getFloatFrequencyData + getByteFrequencyData + if (typeof AnalyserNode !== 'undefined') { + var _origGFFD = AnalyserNode.prototype.getFloatFrequencyData; + AnalyserNode.prototype.getFloatFrequencyData = function(arr) { + _origGFFD.call(this, arr); + for (var i = 0; i < arr.length; i++) arr[i] += _af(i) * 1e4; + }; + _nativeFnNames.set(AnalyserNode.prototype.getFloatFrequencyData, 'getFloatFrequencyData'); + + var _origGBFD = AnalyserNode.prototype.getByteFrequencyData; + AnalyserNode.prototype.getByteFrequencyData = function(arr) { + _origGBFD.call(this, arr); + for (var i = 0; i < arr.length; i++) { + arr[i] = Math.max(0, Math.min(255, arr[i] + (_af(i) > 0 ? 1 : 0))); + } + }; + _nativeFnNames.set(AnalyserNode.prototype.getByteFrequencyData, 'getByteFrequencyData'); + } + })(); + + // --- Font enumeration spoof (Windows/macOS fonts absent on Linux = fingerprint) --- + // Detection: measure text width with "Calibri,sans-serif" vs "sans-serif" baseline. + // If equal → font absent. We apply per-font width factors so probed fonts read + // as present. Factors are approximate ratio of real font width to sans-serif fallback. + // Also patched on OffscreenCanvas (used by CreepJS and similar scanners). + (function() { + var _fonts = { + 'calibri': 0.880, + 'calibri light': 0.835, + 'cambria': 0.982, + 'cambria math': 0.982, + 'consolas': 0.930, + 'constantia': 0.983, + 'corbel': 0.928, + 'franklin gothic medium': 0.914, + 'segoe ui': 0.938, + 'segoe ui light': 0.894, + 'segoe ui semibold': 0.948, + 'palatino linotype': 1.025, + 'book antiqua': 1.010, + 'garamond': 0.856, + 'ms sans serif': 0.946, + 'ms serif': 1.015, + 'helvetica neue': 0.938, + 'lucida grande': 0.971, + 'lucida console': 0.886, + 'optima': 0.942, + 'gill sans': 0.918, + 'apple sd gothic neo': 0.910, + 'apple chancery': 1.030, + 'monaco': 0.893, + 'menlo': 0.901, + 'andale mono': 0.878, + }; + + function _matchFont(fontStr) { + var lower = (fontStr || '''').toLowerCase(); + for (var name in _fonts) { + if (lower.indexOf(name) !== -1) return _fonts[name]; + } + return null; + } + + function _patchMeasureText(Proto) { + if (!Proto || !Proto.prototype || !Proto.prototype.measureText) return; + var _orig = Proto.prototype.measureText; + Proto.prototype.measureText = function(text) { + var m = _orig.call(this, text); + var factor = _matchFont(this.font); + if (factor === null) return m; + var w = m.width * factor; + return new Proxy(m, { + get: function(t, p) { + if (p === 'width') return w; + var v = t[p]; + return typeof v === 'function' ? v.bind(t) : v; + } + }); + }; + _nativeFnNames.set(Proto.prototype.measureText, 'measureText'); + } + + _patchMeasureText(CanvasRenderingContext2D); + if (typeof OffscreenCanvasRenderingContext2D !== 'undefined') { + _patchMeasureText(OffscreenCanvasRenderingContext2D); + } + })(); ''; }; @@ -686,14 +938,28 @@ async function __hmMove(page, tx, ty) {\ name = "keep-alive-init.js"; text = '' (function() { - let _kaTimer = null; + var _kaTimer = null; + var _KA_IDLE_MS = 240000; // 4 min — refresh before 5-min JWT TTLs expire + function _kaTick() { + // Silent favicon fetch: sends cookies to the server so it can + // see authenticated activity without reloading the page. + // Uses GET (not HEAD) — many CDN/WAF configs (e.g. Akamai) return + // 503 for HEAD requests. Falls back to a GET on the current URL + // if the favicon CDN is cross-origin (won't carry auth cookies). + var _faviconEl = document.querySelector('link[rel~="icon"]'); + var _faviconUrl = _faviconEl ? _faviconEl.href : null; + var _sameOrigin = _faviconUrl && _faviconUrl.startsWith(location.origin); + var _fetchUrl = _sameOrigin ? _faviconUrl : location.href; + fetch(_fetchUrl, { method: 'GET', credentials: 'include', cache: 'no-store' }) + .catch(function() {}); + // Subtle scroll jitter so the page registers user-like activity. + window.scrollBy(0, 10); + setTimeout(function() { window.scrollBy(0, -10); }, 500); + _kaTimer = setTimeout(_kaTick, _KA_IDLE_MS); + } function _kaReset() { if (_kaTimer) clearTimeout(_kaTimer); - _kaTimer = setTimeout(function _kaTick() { - window.scrollBy(0, 10); - setTimeout(function() { window.scrollBy(0, -10); }, 500); - _kaTimer = setTimeout(_kaTick, 60000); - }, 60000); + _kaTimer = setTimeout(_kaTick, _KA_IDLE_MS); } ['scroll', 'click', 'keydown', 'mousemove'].forEach(function(evt) { window.addEventListener(evt, _kaReset, { passive: true }); @@ -752,6 +1018,76 @@ async function __hmMove(page, tx, ty) {\ _EXTRA_ARGS+=(--config "$_SHARE/config.json") fi + # Inject macOS fingerprint from $HOME/playwright-fingerprint.json if present. + # Merges userAgent into the runtime config and prepends a preamble init script + # that sets window.__cellFp before stealth-init.js runs. + _FP_FILE="$HOME/playwright-fingerprint.json" + if [ -f "$_FP_FILE" ]; then + _UA=$(${pkgs.jq}/bin/jq -r '.userAgent // empty' "$_FP_FILE") + if [ -n "$_UA" ]; then + # Determine base config for merge: prefer already-generated _RUNTIME_CONFIG + # if it has content, otherwise fall back to the static share config. + _FP_CONFIG=$(mktemp /tmp/pw-fp-config-XXXXXX.json) + # Build userAgentMetadata from fingerprint fields so CDP overrides the actual + # sec-ch-ua-* HTTP request headers (not just JS-visible navigator.userAgentData). + # uaPlatform ("macOS") maps to the Client Hints platform string. + # version is the full browser version for sec-ch-ua-full-version. + # brands are used for sec-ch-ua header value. + _BASE_CONFIG="$_SHARE/config.json" + [ -s "$_RUNTIME_CONFIG" ] && _BASE_CONFIG="$_RUNTIME_CONFIG" + ${pkgs.jq}/bin/jq -n \ + --arg ua "$_UA" \ + --slurpfile fp "$_FP_FILE" \ + --slurpfile cfg "$_BASE_CONFIG" \ + '($cfg[0]) as $cfg | ($fp[0]) as $fp | + ($fp.uaPlatform // "macOS") as $platform | + ($fp.version // "") as $ver | + ($fp.brands // []) as $brands | + $cfg + | .browser.contextOptions.userAgent = $ua + | .browser.contextOptions.userAgentMetadata = { + "platform": $platform, + "platformVersion": "", + "architecture": "x86_64", + "model": "", + "mobile": false, + "brands": (if ($brands | length) > 0 then $brands else [ + {"brand": "Google Chrome", "version": "146"}, + {"brand": "Chromium", "version": "146"}, + {"brand": "Not/A)Brand", "version": "8"} + ] end), + "fullVersionList": (if ($brands | length) > 0 then $brands else [ + {"brand": "Google Chrome", "version": ($ver // "146.0.0.0")}, + {"brand": "Chromium", "version": ($ver // "146.0.0.0")}, + {"brand": "Not/A)Brand", "version": "8.0.0.0"} + ] end) + } + | .browser.launchOptions.args = ((.browser.launchOptions.args // []) + ["--user-agent=" + $ua]) + ' > "$_FP_CONFIG" + + # Replace any existing --config in _EXTRA_ARGS with the merged config. + _NEW_EXTRA_ARGS=() + _skip_next=false + for _arg in "''${_EXTRA_ARGS[@]}"; do + if $_skip_next; then _skip_next=false; continue; fi + if [ "$_arg" = "--config" ]; then _skip_next=true; continue; fi + _NEW_EXTRA_ARGS+=("$_arg") + done + _EXTRA_ARGS=("''${_NEW_EXTRA_ARGS[@]}") + _EXTRA_ARGS+=(--config "$_FP_CONFIG") + + # Write preamble init script: window.__cellFp = ; + _FP_PREAMBLE=$(mktemp /tmp/pw-fp-preamble-XXXXXX.js) + printf 'window.__cellFp = ' > "$_FP_PREAMBLE" + ${pkgs.jq}/bin/jq '.' "$_FP_FILE" >> "$_FP_PREAMBLE" + printf ';\n' >> "$_FP_PREAMBLE" + + # Prepend preamble BEFORE stealth-init.js so it is available to all init scripts. + _EXTRA_ARGS+=(--init-script "$_FP_PREAMBLE") + trap 'rm -f "$_RUNTIME_CONFIG" "$SECRETS_FILE" "$_FP_CONFIG" "$_FP_PREAMBLE"' EXIT + fi + fi + [ -f "$_SHARE/stealth-init.js" ] && _EXTRA_ARGS+=(--init-script "$_SHARE/stealth-init.js") [ -f "$_SHARE/keep-alive-init.js" ] && _EXTRA_ARGS+=(--init-script "$_SHARE/keep-alive-init.js") diff --git a/nixhome/modules/security.nix b/nixhome/modules/security.nix index c193c7f..2a92e72 100644 --- a/nixhome/modules/security.nix +++ b/nixhome/modules/security.nix @@ -85,6 +85,10 @@ in { wafw00f # WAF fingerprinting (use: wafw00f https://target.com) nmap # port scanner + NSE vuln scripts (use: nmap -sV --script=vuln target.com) + # mobile app analysis + apkeep # APK downloader from Google Play / APKPure (use: apkeep -a com.example.app .) + jadx # APK/DEX decompiler → readable Java source (use: jadx app.apk -d out/) + # parameter discovery arjun # HTTP parameter discovery (use: arjun -u https://target.com/endpoint) diff --git a/nixhome/stacks/ultimate.nix b/nixhome/stacks/ultimate.nix index 3171d0b..4f1305f 100644 --- a/nixhome/stacks/ultimate.nix +++ b/nixhome/stacks/ultimate.nix @@ -1,6 +1,7 @@ { imports = [ ./fullstack.nix + ../modules/android.nix ../modules/desktop ../modules/electronics.nix ../modules/financial.nix diff --git a/test/testdata/openrouter_models.json b/test/testdata/openrouter_models.json new file mode 100644 index 0000000..bd74388 --- /dev/null +++ b/test/testdata/openrouter_models.json @@ -0,0 +1,69 @@ +{ + "data": [ + { + "id": "anthropic/claude-opus-4-5", + "name": "Claude Opus 4.5", + "pricing": { "prompt": "0.000015", "completion": "0.000075" } + }, + { + "id": "anthropic/claude-opus-4", + "name": "Claude Opus 4", + "pricing": { "prompt": "0.000015", "completion": "0.000075" } + }, + { + "id": "anthropic/claude-sonnet-4-5", + "name": "Claude Sonnet 4.5", + "pricing": { "prompt": "0.000003", "completion": "0.000015" } + }, + { + "id": "openai/gpt-4o", + "name": "GPT-4o", + "pricing": { "prompt": "0.0000025", "completion": "0.000010" } + }, + { + "id": "openai/gpt-4o-mini", + "name": "GPT-4o Mini", + "pricing": { "prompt": "0.00000015", "completion": "0.0000006" } + }, + { + "id": "google/gemini-2-5-pro", + "name": "Gemini 2.5 Pro", + "pricing": { "prompt": "0.00000125", "completion": "0.000010" } + }, + { + "id": "google/gemini-2-5-flash", + "name": "Gemini 2.5 Flash", + "pricing": { "prompt": "0.0000001", "completion": "0.0000004" } + }, + { + "id": "google/gemini-2-0-flash", + "name": "Gemini 2.0 Flash", + "pricing": { "prompt": "0.0000001", "completion": "0.0000004" } + }, + { + "id": "deepseek/deepseek-r1", + "name": "DeepSeek R1", + "pricing": { "prompt": "0.0000005", "completion": "0.0000022" } + }, + { + "id": "deepseek/deepseek-r1-0528", + "name": "DeepSeek R1 0528", + "pricing": { "prompt": "0.0000005", "completion": "0.0000022" } + }, + { + "id": "qwen/qwen-2-5-coder-32b-instruct", + "name": "Qwen2.5 Coder 32B Instruct", + "pricing": { "prompt": "0.000000069", "completion": "0.000000069" } + }, + { + "id": "meta-llama/llama-3-1-405b-instruct", + "name": "Llama 3.1 405B Instruct", + "pricing": { "prompt": "0.0000027", "completion": "0.0000027" } + }, + { + "id": "mistralai/devstral", + "name": "Devstral", + "pricing": { "prompt": "0.0000003", "completion": "0.0000009" } + } + ] +} diff --git a/test/testdata/swebench_leaderboards.json b/test/testdata/swebench_leaderboards.json new file mode 100644 index 0000000..ddfa26b --- /dev/null +++ b/test/testdata/swebench_leaderboards.json @@ -0,0 +1,69 @@ +{ + "leaderboards": [ + { + "name": "SWE-bench Verified", + "results": [ + { + "name": "Claude Opus 4.5 + SWE-agent", + "resolved": 72.5, + "os_model": false, + "tags": ["Model: claude-opus-4-5", "Org: Anthropic"] + }, + { + "name": "Claude Opus 4 + SWE-agent", + "resolved": 65.0, + "os_model": false, + "tags": ["Model: claude-opus-4", "Org: Anthropic"] + }, + { + "name": "Claude Sonnet 4.5 + SWE-agent", + "resolved": 62.3, + "os_model": false, + "tags": ["Model: claude-sonnet-4-5", "Org: Anthropic"] + }, + { + "name": "GPT-4o + SWE-agent", + "resolved": 57.4, + "os_model": false, + "tags": ["Model: gpt-4o", "Org: OpenAI"] + }, + { + "name": "Gemini 2.5 Pro + SWE-agent", + "resolved": 63.8, + "os_model": false, + "tags": ["Model: gemini-2-5-pro", "Org: Google"] + }, + { + "name": "DeepSeek R1 + SWE-agent", + "resolved": 49.2, + "os_model": true, + "tags": ["Model: https://huggingface.co/deepseek-ai/DeepSeek-R1", "Org: DeepSeek"] + }, + { + "name": "DeepSeek R1 0528 + SWE-agent", + "resolved": 57.6, + "os_model": true, + "tags": ["Model: https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", "Org: DeepSeek"] + }, + { + "name": "Qwen2.5 Coder 32B + SWE-agent", + "resolved": 35.0, + "os_model": true, + "tags": ["Model: https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct", "Org: Qwen"] + }, + { + "name": "Llama 3.1 405B + SWE-agent", + "resolved": 33.4, + "os_model": true, + "tags": ["Model: https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct", "Org: Meta"] + }, + { + "name": "Devstral + SWE-agent", + "resolved": 46.8, + "os_model": true, + "tags": ["Model: https://huggingface.co/mistralai/Devstral-Small-2505", "Org: Mistral"] + } + ] + } + ] +} From 6b4a251b5da43049bb29da9a038dc8d6aae348da Mon Sep 17 00:00:00 2001 From: Dmitry Kireev Date: Tue, 21 Apr 2026 05:58:05 +0000 Subject: [PATCH 4/4] Update website and README: vagrant engine, zero-password login, android module, model ranking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - docs(web/index): add Vagrant to infra tools list and logo grid, add Android & Mobile category, add PlatformIO to electronics — website reflects current image contents - docs(web/FeatureCards): update stealth browser card to explain zero-password cell login and anti-bot two-phase flow — users understand how authenticated sessions work without exposing passwords - docs(web/FeatureCards): add "Docker or VM — your choice" card for vagrant engine / --macos flag — Apple Silicon and no-Docker-Desktop use case is now discoverable on the homepage - docs(web/index): add FAQ entries for vagrant engine, cell login anti-bot flow, 1Password integration, and cell models cloud ranking — answers the four most common new questions - docs(web/StackTable): fix registry URL from ghcr.io/dimmkirr to public.ecr.aws/w1l3v2k8/devcell — no user-facing impact - docs(web/logos): add vagrant.svg brand mark — no user-facing impact - docs(README): add Vagrant engine section with --macos quickstart commands and devcell.toml snippet — users can find the VM engine without reading source - docs(README): add "Browser login & anti-bot protection" section documenting two-phase cell login flow — zero-password session sync is now documented - docs(README): add add-on modules table (android, desktop, scraping, infra) — module system is documented for the first time - docs(README): update "what you get" bullets to cover zero-password login, 1Password detail, vagrant engine, and cloud model ranking — summary matches current feature set - docs(README): add PlatformIO to electronics stack row and fix registry URL — no user-facing impact --- README.md | 57 ++++++++++++++++++++++++--- web/public/logos/vagrant.svg | 5 +++ web/src/components/FeatureCards.astro | 9 ++++- web/src/components/StackTable.astro | 2 +- web/src/pages/index.astro | 18 +++++++-- 5 files changed, 79 insertions(+), 12 deletions(-) create mode 100644 web/public/logos/vagrant.svg diff --git a/README.md b/README.md index 438917b..ec5a380 100644 --- a/README.md +++ b/README.md @@ -21,15 +21,16 @@ On first run, `cell` creates `.devcell.toml` and `.devcell/` in your project dir - **Isolated sandbox** - agents edit freely inside your project; your host system is untouched - **12+ MCP servers** - Yahoo Finance, Google Maps, Linear, KiCad, Inkscape, and more. Backing tools ship in the image alongside their servers - **Claude Max/Pro support** - runs Claude Code directly, no API key or proxy needed -- **Stealth Chromium** - anti-fingerprint browser with Playwright, passes bot detection out of the box +- **Stealth Chromium + zero-password login** - `cell login ` opens a clean browser on your host, you log in, press Enter; cookies and localStorage sync to the container. The agent never sees your password. Anti-fingerprint Playwright replays sessions that pass Cloudflare and Kasada - **Remote desktop** - VNC and RDP into the container to watch or interact with GUI apps -- **1Password secrets** - API keys resolved at runtime, never written to disk +- **1Password secrets** - list document names in `.devcell.toml`; fields are injected as env vars into the container at runtime, written to a RAM-only tmpfs, gone when the container stops +- **Docker or VM engine** - default: Docker container. Add `--macos` to provision a Debian ARM64 VM via Vagrant + UTM instead — same nixhome toolchain, same commands, no Docker Desktop required - **7 image stacks** - from minimal (`base`) to everything-included (`ultimate`) -- **Local ollama models** - route Claude through local models, ranked by SWE-Bench scores +- **Model ranking** - `cell models` shows cloud models (Anthropic, OpenAI, Google via OpenRouter) and local ollama models ranked by SWE-Bench score and speed, side by side ## Stacks -Seven stacks, published to `ghcr.io/dimmkirr/devcell`. Multi-arch: linux/amd64, linux/arm64. +Seven stacks, published to `public.ecr.aws/w1l3v2k8/devcell`. Multi-arch: linux/amd64, linux/arm64. | Stack | What's inside | |---|---| @@ -38,9 +39,40 @@ Seven stacks, published to `ghcr.io/dimmkirr/devcell`. Multi-arch: linux/amd64, | **node** | base + Node.js 22, npm, stealth Chromium | | **python** | base + Python 3.13, uv, stealth Chromium | | **fullstack** | go + node + python | -| **electronics** | base + GUI desktop + KiCad, ngspice, ESPHome, wokwi-cli | +| **electronics** | base + GUI desktop + KiCad, ngspice, ESPHome, PlatformIO, wokwi-cli | | **ultimate** | fullstack + GUI desktop, all MCP servers, Inkscape, KiCad *(default)* | +Add-on modules (set `modules = ["android"]` in `.devcell.toml`): + +| Module | What's inside | +|---|---| +| **android** | ADB + fastboot (all platforms), Android SDK + build-tools + emulator + apktool + jadx (x86_64 only) | +| **desktop** | GUI desktop: VNC, RDP, Fluxbox, PulseAudio | +| **scraping** | Playwright stealth scripts, anti-fingerprint Chromium config | +| **infra** | Cloud CLI tools: AWS, GCP, Azure | + +## Vagrant engine (no Docker required) + +Run cells as native VMs instead of Docker containers — useful for Apple Silicon without Docker Desktop, or when you need full Linux kernel features (KVM, `/dev/kvm`). + +```bash +cell claude --macos # provision Debian ARM64 VM via UTM, then open Claude Code +cell build --macos # re-apply nixhome flake inside the VM +cell build --update --macos # nix flake update inside VM, then re-provision +cell rdp --list # shows docker + vagrant cells side by side +``` + +Set permanently in `.devcell.toml`: + +```toml +[cell] +engine = "vagrant" +vagrant_provider = "utm" # utm (macOS) or libvirt (Linux) +vagrant_box = "utm/bookworm" +``` + +On first run the CLI scaffolds a `Vagrantfile`, starts the VM, installs Nix single-user, and applies the same home-manager configuration used by Docker images. Subsequent runs detect whether provisioning is needed and skip it if the binary is already present. + ## MCP servers Baked into the image and auto-merged into each agent's config at container startup. User-defined servers are preserved. Where applicable, the backing tools ship too: KiCad, Inkscape, and OpenTofu are installed alongside their MCP servers, so the agent can run `tofu plan`, analyze PCBs, or edit SVGs. New servers ship with image updates. @@ -60,6 +92,21 @@ Baked into the image and auto-merged into each agent's config at container start | Notion | Database and page management | OAuth 2.1 | | MCP-NixOS | Nix package search and docs | None | +## Browser login & anti-bot protection + +`cell login` lets the agent use authenticated sessions without ever seeing passwords: + +```bash +cell login https://example.com # opens a real browser on your host + # you log in normally, press Enter + # cookies + localStorage sync to the container +cell login --force https://... # wipe saved session and start fresh +``` + +**How it avoids bot detection:** the login browser opens with no CDP debugging port — no `--remote-debugging-port`, no special flags. Cloudflare, Kasada, and similar systems cannot detect it as automated. After you close the browser, a separate headless CDP instance reads the cookies from the same profile and writes `storage-state.json` for Playwright. The agent replays the session; your password is never exposed. + +The fingerprint (`User-Agent`, platform, browser brands) is read from your real installed Chrome binary and saved alongside the session so Patchright uses an identical identity. + ## Security - Project directory mounted at `/workspace`. Host filesystem is unreachable diff --git a/web/public/logos/vagrant.svg b/web/public/logos/vagrant.svg new file mode 100644 index 0000000..76b9686 --- /dev/null +++ b/web/public/logos/vagrant.svg @@ -0,0 +1,5 @@ + + + + + diff --git a/web/src/components/FeatureCards.astro b/web/src/components/FeatureCards.astro index ab79c23..114728d 100644 --- a/web/src/components/FeatureCards.astro +++ b/web/src/components/FeatureCards.astro @@ -36,8 +36,13 @@
-

Stealth Chromium built in

-

Anti-fingerprint Chromium with Playwright, ready for scraping and browser automation. Passes bot detection out of the box. Connect via VNC or RDP to watch it work.

+

Stealth Chromium, zero passwords

+

Run cell login on your host to log into any site — a clean browser opens (no CDP, no bot-detection triggers), you log in, press Enter. Cookies and localStorage sync to the container automatically. The agent never sees your password. Anti-fingerprint Playwright replays sessions that pass Cloudflare and Kasada.

+
+
+ +

Docker or VM — your choice

+

Default: Docker container, zero setup. Add --macos and devcell provisions a Debian ARM64 VM via Vagrant + UTM instead — same nixhome toolchain, same commands. Works for teams that can't use Docker Desktop or need native Linux on Apple Silicon.

diff --git a/web/src/components/StackTable.astro b/web/src/components/StackTable.astro index 5c1138f..c1bcbe0 100644 --- a/web/src/components/StackTable.astro +++ b/web/src/components/StackTable.astro @@ -39,6 +39,6 @@ const { stackColumns, stackFeatures } = Astro.props;

* Headless only. GUI desktop (VNC/RDP) available in electronics and ultimate stacks.

-

Multi-arch: linux/amd64 and linux/arm64. Published to ghcr.io/dimmkirr/devcell.

+

Multi-arch: linux/amd64 and linux/arm64. Published to public.ecr.aws/w1l3v2k8/devcell.

Base nix image size ~1.3 GB.

diff --git a/web/src/pages/index.astro b/web/src/pages/index.astro index e63d809..aeb1e1e 100644 --- a/web/src/pages/index.astro +++ b/web/src/pages/index.astro @@ -12,10 +12,11 @@ const stableVersion = process.env.STABLE_VERSION || ''; const stacks = [ { category: "AI Agents", tools: ["Claude Code", "OpenAI Codex", "OpenCode"] }, { category: "Languages & Runtimes", tools: ["Go", "Node.js", "Python", "Ruby", "Swift / LLVM"] }, - { category: "Infrastructure & IaC", tools: ["Terraform", "OpenTofu", "Docker", "Compose", "Packer", "Helm", "Nix"] }, + { category: "Infrastructure & IaC", tools: ["Terraform", "OpenTofu", "Docker", "Compose", "Vagrant", "Packer", "Helm", "Nix"] }, { category: "Finance & Data", tools: ["Yahoo Finance", "EdgarTools SEC", "FRED API"] }, { category: "Productivity & Travel", tools: ["Linear", "Notion", "Inoreader RSS", "Google Maps", "TripIt"] }, - { category: "Electronics & Design", tools: ["KiCad", "Inkscape (vector graphics)", "ngspice", "ESPHome", "wokwi-cli"] }, + { category: "Electronics & Design", tools: ["KiCad", "Inkscape (vector graphics)", "ngspice", "ESPHome", "PlatformIO", "wokwi-cli"] }, + { category: "Android & Mobile", tools: ["ADB", "apktool", "jadx", "Android SDK (x86_64, addon)"] }, { category: "Desktop & Browser", tools: ["VNC", "RDP", "Chromium (stealth)", "Playwright", "PulseAudio"] }, ]; @@ -31,6 +32,7 @@ const logos = [ { src: "/logos/terraform-icon.svg", alt: "Terraform", title: "Terraform" }, { src: "/logos/opentofu.svg", alt: "OpenTofu", title: "OpenTofu" }, { src: "/logos/docker-icon.svg", alt: "Docker", title: "Docker" }, + { src: "/logos/vagrant.svg", alt: "Vagrant", title: "Vagrant (VM engine)" }, { src: "/logos/nix.svg", alt: "Nix", title: "Nix" }, { src: "/logos/packer.svg", alt: "Packer", title: "Packer" }, { src: "/logos/git-icon.svg", alt: "Git", title: "Git" }, @@ -54,8 +56,8 @@ const stackFeatures = [ { name: "Node.js environment", note: "Node.js 22, npm", in: ["node","fullstack","ultimate"] }, { name: "Python environment", note: "Python 3.13, uv", in: ["python","fullstack","ultimate"] }, { name: "Infra tools", note: "Terraform, OpenTofu, Packer, Helm", in: ["go","fullstack","ultimate"] }, - { name: "Stealth browser", note: "Anti-fingerprint Chromium + Playwright", in: ["node","python","fullstack","ultimate"], partial: ["node","python","fullstack"] }, - { name: "Electronics & DIY", note: "KiCad, ngspice, ESPHome, wokwi-cli", in: ["electronics","ultimate"] }, + { name: "Stealth browser", note: "Anti-fingerprint Chromium + Playwright, anti-bot cookie sync", in: ["node","python","fullstack","ultimate"], partial: ["node","python","fullstack"] }, + { name: "Electronics & DIY", note: "KiCad, ngspice, ESPHome, PlatformIO, wokwi-cli", in: ["electronics","ultimate"] }, { name: "GUI desktop", note: "VNC + RDP + Fluxbox + PulseAudio", in: ["electronics","ultimate"] }, { name: "12+ MCP servers", note: "Finance, maps, productivity, vector graphics (Inkscape) — growing", in: ["ultimate"] }, ]; @@ -73,10 +75,18 @@ const logoGrid = [ const faqItems = [ { q: "Do I need an API key or Claude subscription?", a: "Bring your own license or model. Claude Max, Pro, and API keys all work — devcell starts the same client you already use, just inside a container. Same goes for Codex and OpenCode." }, + { q: "Can I run DevCell without Docker — e.g. natively on Apple Silicon?", + a: "Yes. Pass --engine=vagrant (or --macos) and devcell provisions a Debian ARM64 VM via Vagrant and UTM instead of a Docker container. The same nixhome toolchain installs via Nix inside the VM. All cell commands work identically: cell claude --macos, cell build --macos, cell rdp --macos. The VM shows up alongside Docker cells in cell rdp --list." }, + { q: "How does the agent log into websites without seeing passwords?", + a: "Run cell login on your host to open a real browser window — no CDP, no debugging ports that trip bot detection. Log in normally, press Enter, and devcell extracts cookies and localStorage via a headless CDP pass after the browser closes. Playwright inside the container replays the session without ever exposing credentials to the agent. Add --force to wipe a stale session and start fresh." }, + { q: "How does 1Password integration work?", + a: "List document names under [op] documents in devcell.toml. On cell start, the CLI reads each 1Password document on your host (requires op CLI and an active session), maps every field to an env var by label, and injects them into the container at runtime. Secrets are written to a RAM-only tmpfs at /run/secrets/ — they're gone when the container stops and never touch disk inside the container." }, { q: "How is DevCell different from Dev Containers?", a: "Dev Containers are editor-first — you write a Dockerfile + devcontainer.json per project for VS Code or GitHub Codespaces. DevCell is agent-first: one command, 7 pre-built Nix-pinned stacks, 12+ MCP servers auto-merged at startup, stealth Chromium + Playwright, VNC/RDP built in, and 1Password secret injection. No per-project Dockerfile maintenance." }, { q: "How is DevCell different from OpenClaw?", a: "OpenClaw is a multi-channel messaging gateway (WhatsApp, Telegram, Slack) with AI features. DevCell is a sandboxed coding environment for AI agents. Key differences: DevCell has mandatory container isolation (OpenClaw is network-exposed by default with multiple critical CVEs in 2026), curated MCP servers with backing tools shipped in the image (OpenClaw has an open marketplace with 1,184 malicious skills found), and Claude Max/Pro subscriptions work directly (OpenClaw requires API keys since Anthropic blocked subscription auth in Jan 2026)." }, + { q: "How does cell models work — do I need a local GPU?", + a: "No GPU needed. cell models fetches live models from OpenRouter (Anthropic, OpenAI, Google) and shows them alongside any locally installed ollama models, ranked by SWE-Bench score and speed. Set [llm.models] default in devcell.toml to pin a model; with --ollama enabled, cell claude auto-selects the best-ranked local model if nothing is pinned." }, { q: "Can the agent install packages inside the container?", a: "Yes. The agent can run apt, npm install, pip install, nix — whatever the project needs. Network access is unrestricted. Port forwarding and extra volume mounts are configurable in devcell.toml." }, { q: "What about file permissions?",