garrytan · garrytan · May 1, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,39 @@
+# Force LF on text files we parse with `\n`-anchored regexes (frontmatter,
+# YAML, markdown structure tests). Without this, Windows checkouts with
+# core.autocrlf=true convert these to CRLF and break tests that match
+# /^---\n...\n---/ against SKILL.md.tmpl frontmatter, etc.
+*.md         text eol=lf
+*.tmpl       text eol=lf
+*.yml        text eol=lf
+*.yaml       text eol=lf
+*.json       text eol=lf
+*.toml       text eol=lf
+
+# Bash scripts must always use LF — CRLF in bash scripts produces bizarre
+# "Bad interpreter" / "command not found" errors on Linux runners.
+*.sh         text eol=lf
+*.bash       text eol=lf
+
+# Extensionless executables (top-level setup script + bin/gstack-* helpers).
+# These are bash scripts checked into git without a `.sh` suffix. Without
+# explicit eol=lf, Windows checkout with core.autocrlf=true converts them
+# to CRLF and breaks both `\n`-anchored regex tests (test/setup-codesign.test.ts)
+# and shebang resolution if the script is ever executed on Linux.
+setup        text eol=lf
+bin/*        text eol=lf
+**/scripts/* text eol=lf
+
+# TypeScript/JavaScript: LF for portability across the bun toolchain.
+*.ts         text eol=lf
+*.tsx        text eol=lf
+*.js         text eol=lf
+*.mjs        text eol=lf
+*.cjs        text eol=lf
+
+# Binary files — never touch.
+*.png        binary
+*.jpg        binary
+*.jpeg       binary
+*.gif        binary
+*.ico        binary
+*.pdf        binary
diff --git a/.github/workflows/windows-free-tests.yml b/.github/workflows/windows-free-tests.yml
@@ -0,0 +1,98 @@
+name: Windows Free Tests
+
+# Curated subset of the free test suite that runs on windows-latest.
+#
+# Codex's v1.18.0.0 review flagged that the existing evals.yml workflow uses
+# a Linux container, so a windows-latest matrix entry there isn't a drop-in.
+# This workflow is non-container, runs the curated Windows-safe subset, plus
+# targeted resolver tests that exercise the Bun.which-based claude binary
+# resolution + the GSTACK_CLAUDE_BIN override path on Windows.
+#
+# What this DOES NOT do (out of scope for v1.18.0.0):
+#   - Run the full free suite on Windows. The 24 tests that hardcode /bin/sh,
+#     spawn('sh',...), or raw /tmp/ paths are excluded by scripts/test-free-shards.ts
+#     --windows-only. They need POSIX-bound surfaces to be ported off shell
+#     primitives before they can run on Windows. Tracked as a follow-up TODO.
+#   - Run Playwright/browser-backed tests. Browse server bring-up on Windows is
+#     a separate concern (PR #1238 windows-pty-bun-pty-fix is in flight).
+
+on:
+  pull_request:
+    branches: [main]
+  workflow_dispatch:
+
+concurrency:
+  group: windows-free-${{ github.head_ref }}
+  cancel-in-progress: true
+
+jobs:
+  windows-free-tests:
+    runs-on: windows-latest
+    timeout-minutes: 15
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: oven-sh/setup-bun@v1
+        with:
+          bun-version: latest
+
+      - name: Configure git identity (required by tests that init temp repos)
+        run: |
+          git config --global user.email "windows-ci@gstack.test"
+          git config --global user.name "Windows CI"
+          git config --global init.defaultBranch main
+        shell: bash
+
+      - name: Install dependencies
+        run: bun install --frozen-lockfile
+
+      - name: Build server-node.mjs (required by Windows browse path)
+        # browse/src/cli.ts module-level throws on Windows if server-node.mjs
+        # is missing — Bun can't drive Playwright's Chromium on Windows
+        # (oven-sh/bun#4253). The bundle must exist for any test that
+        # transitively loads cli.ts to even import. We build only the
+        # Node-compatible server bundle here; full `bun run build` would
+        # also compile every binary which is slow and unnecessary for tests.
+        run: bash browse/scripts/build-node-server.sh
+        shell: bash
+
+      - name: Generate host SKILL.md outputs (.agents, .factory)
+        # The golden-file regression tests in test/gen-skill-docs.test.ts read
+        # .agents/skills/gstack-ship/SKILL.md and .factory/skills/gstack-ship/
+        # SKILL.md. Both are gitignored — generated on demand by gen:skill-docs.
+        # On Mac/Linux CI the existing eval workflow regenerates these as part
+        # of its own pipeline; the windows-free-tests lane doesn't share that
+        # so it must regenerate explicitly.
+        run: bun run gen:skill-docs --host all
+        shell: bash
+
+      # The Windows job verifies the new portability work this PR delivers,
+      # not the entire free suite. After v1.20.0.0 ships, full-suite Windows
+      # parity is a P4 follow-up TODO that depends on porting many tests off
+      # POSIX-bound surfaces (raw /tmp paths, /bin/bash hardcodes, bash
+      # shebang spawns, mode-bit assertions, deleted v1.14 sidebar refs, etc).
+      #
+      # The curated subset enumeration in scripts/test-free-shards.ts is
+      # retained for future expansion — `bun run test:windows --list` gives
+      # contributors a starting point to grow Windows coverage incrementally.
+      #
+      # What we verify here is exactly the new code paths v1.20.0.0 ships:
+      #  - bin/gstack-paths state-root resolution (test/gstack-paths.test.ts)
+      #  - browse/src/claude-bin.ts Bun.which wrapper + override + arg-prefix
+      #    resolution including the GSTACK_CLAUDE_BIN=wsl PATHEXT path
+      #    (browse/test/claude-bin.test.ts)
+      #  - scripts/test-free-shards.ts curation logic itself
+      #    (test/test-free-shards.test.ts)
+
+      - name: Show curated subset (informational — for future expansion)
+        run: bun run scripts/test-free-shards.ts --windows-only --list
+        shell: bash
+        continue-on-error: true
+
+      - name: Verify new portability work on Windows
+        # 31 tests targeting the new code paths added by v1.20.0.0. These
+        # MUST pass for the release-note headline ("curated Windows lane added")
+        # to be truthful.
+        run: bun test test/gstack-paths.test.ts browse/test/claude-bin.test.ts test/test-free-shards.test.ts
+        shell: bash
diff --git a/AGENTS.md b/AGENTS.md
@@ -6,44 +6,106 @@ designer, QA lead, release engineer, debugger, and more.
 
 ## Available skills
 
-Skills live in `.agents/skills/`. Invoke them by name (e.g., `/office-hours`).
+Skills live in `.agents/skills/` (or `~/.claude/skills/gstack/` on Claude Code).
+Invoke them by name (e.g., `/office-hours`).
+
+### Plan-mode reviews
 
 | Skill | What it does |
 |-------|-------------|
 | `/office-hours` | Start here. Reframes your product idea before you write code. |
 | `/plan-ceo-review` | CEO-level review: find the 10-star product in the request. |
 | `/plan-eng-review` | Lock architecture, data flow, edge cases, and tests. |
 | `/plan-design-review` | Rate each design dimension 0-10, explain what a 10 looks like. |
+| `/plan-devex-review` | DX-mode review: TTHW, magical moments, friction points, persona traces. |
+| `/plan-tune` | Self-tune AskUserQuestion sensitivity per question. |
+| `/autoplan` | One command runs CEO → design → eng → DX review. |
 | `/design-consultation` | Build a complete design system from scratch. |
+
+### Implementation + review
+
+| Skill | What it does |
+|-------|-------------|
 | `/review` | Pre-landing PR review. Finds bugs that pass CI but break in prod. |
-| `/debug` | Systematic root-cause debugging. No fixes without investigation. |
-| `/design-review` | Design audit + fix loop with atomic commits. |
+| `/codex` | Second opinion via OpenAI Codex. Review, challenge, or consult modes. |
+| `/investigate` | Systematic root-cause debugging. No fixes without investigation. |
+| `/design-review` | Live-site visual audit + fix loop with atomic commits. |
+| `/design-shotgun` | Generate multiple AI design variants, comparison board, iterate. |
+| `/design-html` | Generate production-quality Pretext-native HTML/CSS. |
+| `/devex-review` | Live developer experience audit (TTHW measured against the real flow). |
 | `/qa` | Open a real browser, find bugs, fix them, re-verify. |
-| `/qa-only` | Same as /qa but report only — no code changes. |
-| `/ship` | Run tests, review, push, open PR. One command. |
+| `/qa-only` | Same methodology as /qa but report only — no code changes. |
+
+### Release + deploy
+
+| Skill | What it does |
+|-------|-------------|
+| `/ship` | Run tests, review, push, open PR. Workspace-aware version queue. |
+| `/land-and-deploy` | Merge the PR, wait for CI and deploy, verify production health. |
+| `/canary` | Post-deploy monitoring loop using the browse daemon. |
+| `/landing-report` | Read-only dashboard for the workspace-aware ship queue. |
 | `/document-release` | Update all docs to match what you just shipped. |
+| `/setup-deploy` | One-time deploy config detection (Fly.io, Render, Vercel, etc.). |
+| `/gstack-upgrade` | Update gstack to the latest version. |
+
+### Operational + memory
+
+| Skill | What it does |
+|-------|-------------|
+| `/context-save` | Save working context (git state, decisions, remaining work). |
+| `/context-restore` | Resume from a saved context, even across Conductor workspaces. |
+| `/learn` | Manage what gstack learned across sessions. |
 | `/retro` | Weekly retro with per-person breakdowns and shipping streaks. |
+| `/health` | Code quality dashboard (type checker, linter, tests, dead code). |
+| `/benchmark` | Performance regression detection (page load, Core Web Vitals). |
+| `/benchmark-models` | Cross-model benchmark for skills (Claude, GPT, Gemini side-by-side). |
+| `/cso` | OWASP Top 10 + STRIDE security audit. |
+| `/setup-gbrain` | Set up gbrain for cross-machine session memory sync. |
+
+### Browser + agent integration
+
+| Skill | What it does |
+|-------|-------------|
 | `/browse` | Headless browser — real Chromium, real clicks, ~100ms/command. |
+| `/open-gstack-browser` | Launch the visible GStack Browser with sidebar + stealth. |
 | `/setup-browser-cookies` | Import cookies from your real browser for authenticated testing. |
+| `/pair-agent` | Pair a remote AI agent (OpenClaw, Codex, etc.) with your browser. |
+
+### Safety + scoping
+
+| Skill | What it does |
+|-------|-------------|
 | `/careful` | Warn before destructive commands (rm -rf, DROP TABLE, force-push). |
 | `/freeze` | Lock edits to one directory. Hard block, not just a warning. |
 | `/guard` | Activate both careful + freeze at once. |
 | `/unfreeze` | Remove directory edit restrictions. |
-| `/gstack-upgrade` | Update gstack to the latest version. |
+| `/make-pdf` | Turn any markdown file into a publication-quality PDF. |
 
 ## Build commands
 
 ```bash
 bun install              # install dependencies
-bun test                 # run tests (free, <5s)
+bun test                 # run free tests (no API spend)
+bun run test:windows     # curated Windows-safe subset (runs on windows-latest)
 bun run build            # generate docs + compile binaries
 bun run gen:skill-docs   # regenerate SKILL.md files from templates
 bun run skill:check      # health dashboard for all skills
 ```
 
+## Platform support
+
+- **macOS** + **Linux**: full test suite supported.
+- **Windows**: curated Windows-safe subset runs on `windows-latest` via the
+  `windows-free-tests` CI job. Setup script (`./setup`) requires Git Bash or
+  MSYS today; native PowerShell support is a future expansion. The `bin/gstack-paths`
+  helper resolves state roots through `CLAUDE_PLUGIN_DATA` / `GSTACK_HOME` so plugin
+  installs work on every platform.
+
 ## Key conventions
 
 - SKILL.md files are **generated** from `.tmpl` templates. Edit the template, not the output.
 - Run `bun run gen:skill-docs --host codex` to regenerate Codex-specific output.
 - The browse binary provides headless browser access. Use `$B <command>` in skills.
 - Safety skills (careful, freeze, guard) use inline advisory prose — always confirm before destructive operations.
+- State paths resolve via `bin/gstack-paths` (sourced via `eval "$(...)"`). Honors `GSTACK_HOME`, `CLAUDE_PLUGIN_DATA`, `CLAUDE_PLANS_DIR`.
+- The `claude` CLI binary resolves via `browse/src/claude-bin.ts` (`Bun.which()` + `GSTACK_CLAUDE_BIN` override). Set `GSTACK_CLAUDE_BIN=wsl` plus `GSTACK_CLAUDE_BIN_ARGS='["claude"]'` to run Claude through WSL on Windows.