From 1af70e7122bcaeec7e1690b0f11834a1d5552962 Mon Sep 17 00:00:00 2001
From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com>
Date: Fri, 24 Apr 2026 16:01:50 -0700
Subject: [PATCH 1/2] fix(skills): use command -v instead of which for codex
detection
which is not POSIX; its behavior varies across platforms (Debian debianutils,
macOS csh script, bash function alias, Busybox missing entirely). In
non-interactive shells spawned by tool runners, PATH and hash state can
diverge so which codex returns "command not found" for a binary that is on
PATH and runnable. The Codex gate then silently skips with CODEX_NOT_AVAILABLE.
Replace which codex with command -v codex across:
- codex/SKILL.md.tmpl (single source of truth for the codex binary check)
- scripts/resolvers/review.ts (3 callsites)
- scripts/resolvers/design.ts (3 callsites)
command -v is POSIX-specified and uses the shell's own resolution, so if
command -v X returns a path, X is runnable.
Update two test assertions that checked for the literal string "which codex"
in test/skill-validation.test.ts and test/skill-e2e-plan.test.ts.
autoplan/ already uses the command -v codex pattern; this aligns the rest of
the codebase with the existing convention.
Fixes #1193
---
codex/SKILL.md.tmpl | 2 +-
scripts/resolvers/design.ts | 6 +++---
scripts/resolvers/review.ts | 6 +++---
test/skill-e2e-plan.test.ts | 2 +-
test/skill-validation.test.ts | 4 ++--
5 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/codex/SKILL.md.tmpl b/codex/SKILL.md.tmpl
index c311fc80b7..f265062cff 100644
--- a/codex/SKILL.md.tmpl
+++ b/codex/SKILL.md.tmpl
@@ -42,7 +42,7 @@ assumptions, catches things you might miss. Present its output faithfully, not s
## Step 0: Check codex binary
```bash
-CODEX_BIN=$(which codex 2>/dev/null || echo "")
+CODEX_BIN=$(command -v codex || echo "")
[ -z "$CODEX_BIN" ] && echo "NOT_FOUND" || echo "FOUND: $CODEX_BIN"
```
diff --git a/scripts/resolvers/design.ts b/scripts/resolvers/design.ts
index fc6d6ecee6..33247aab5e 100644
--- a/scripts/resolvers/design.ts
+++ b/scripts/resolvers/design.ts
@@ -10,7 +10,7 @@ export function generateDesignReviewLite(ctx: TemplateContext): string {
7. **Codex design voice** (optional, automatic if available):
\`\`\`bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
\`\`\`
If Codex is available, run a lightweight design check on the diff:
@@ -512,7 +512,7 @@ The screenshot file at \`/tmp/gstack-sketch.png\` can be referenced by downstrea
After the wireframe is approved, offer outside design perspectives:
\`\`\`bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
\`\`\`
If Codex is available, use AskUserQuestion:
@@ -688,7 +688,7 @@ ${optInSection}
**Check Codex availability:**
\`\`\`bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
\`\`\`
**If Codex is available**, launch both voices simultaneously:
diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts
index a0f29e1746..9a2194e011 100644
--- a/scripts/resolvers/review.ts
+++ b/scripts/resolvers/review.ts
@@ -266,7 +266,7 @@ export function generateCodexSecondOpinion(ctx: TemplateContext): string {
**Binary check first:**
\`\`\`bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
\`\`\`
Use AskUserQuestion (regardless of codex availability):
@@ -425,7 +425,7 @@ Every diff gets adversarial review from both Claude and Codex. LOC is not a prox
DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")
DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0")
DIFF_TOTAL=$((DIFF_INS + DIFF_DEL))
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
# Legacy opt-out — only gates Codex passes, Claude always runs
OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true)
echo "DIFF_SIZE: $DIFF_TOTAL"
@@ -554,7 +554,7 @@ thorough review.
**Check tool availability:**
\`\`\`bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
\`\`\`
Use AskUserQuestion:
diff --git a/test/skill-e2e-plan.test.ts b/test/skill-e2e-plan.test.ts
index 269c889c39..b6d8c3f0d4 100644
--- a/test/skill-e2e-plan.test.ts
+++ b/test/skill-e2e-plan.test.ts
@@ -776,7 +776,7 @@ Write your summary to ${testDir}/${testName}-summary.md`,
const summary = fs.readFileSync(summaryPath, 'utf-8').toLowerCase();
// All skills should have codex availability check
- expect(summary).toMatch(/which codex/);
+ expect(summary).toMatch(/command -v codex/);
// All skills should have fallback behavior
expect(summary).toMatch(/fallback|subagent|unavailable|not available|skip/);
// All skills should show it's optional/non-blocking
diff --git a/test/skill-validation.test.ts b/test/skill-validation.test.ts
index 625bc0a169..b7d696c39d 100644
--- a/test/skill-validation.test.ts
+++ b/test/skill-validation.test.ts
@@ -1315,9 +1315,9 @@ describe('Codex skill', () => {
expect(content).toContain('gstack-review-log');
});
- test('codex/SKILL.md uses which for binary discovery, not hardcoded path', () => {
+ test('codex/SKILL.md uses command -v for binary discovery, not hardcoded path', () => {
const content = fs.readFileSync(path.join(ROOT, 'codex', 'SKILL.md'), 'utf-8');
- expect(content).toContain('which codex');
+ expect(content).toContain('command -v codex');
expect(content).not.toContain('/opt/homebrew/bin/codex');
});
From b6102d8df16e5aa6f7f8079f79d871b969e85bd7 Mon Sep 17 00:00:00 2001
From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com>
Date: Fri, 24 Apr 2026 16:01:58 -0700
Subject: [PATCH 2/2] chore: regenerate SKILL.md files and refresh golden
fixtures
bun run gen:skill-docs --host all output following the which -> command -v
swap in codex/SKILL.md.tmpl and scripts/resolvers/*.ts. Refreshed
test/fixtures/golden/claude-ship-SKILL.md and factory-ship-SKILL.md from
the regenerated ship SKILL.md for each host. codex-ship-SKILL.md was
already byte-identical to the regenerated .agents/ output and did not
need a refresh.
---
codex/SKILL.md | 2 +-
design-consultation/SKILL.md | 2 +-
design-review/SKILL.md | 2 +-
office-hours/SKILL.md | 4 ++--
plan-ceo-review/SKILL.md | 2 +-
plan-design-review/SKILL.md | 2 +-
plan-devex-review/SKILL.md | 2 +-
plan-eng-review/SKILL.md | 2 +-
review/SKILL.md | 2 +-
ship/SKILL.md | 4 ++--
test/fixtures/golden/claude-ship-SKILL.md | 4 ++--
test/fixtures/golden/factory-ship-SKILL.md | 4 ++--
12 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/codex/SKILL.md b/codex/SKILL.md
index 4d98da6138..a6bae60aa5 100644
--- a/codex/SKILL.md
+++ b/codex/SKILL.md
@@ -1092,7 +1092,7 @@ assumptions, catches things you might miss. Present its output faithfully, not s
## Step 0: Check codex binary
```bash
-CODEX_BIN=$(which codex 2>/dev/null || echo "")
+CODEX_BIN=$(command -v codex || echo "")
[ -z "$CODEX_BIN" ] && echo "NOT_FOUND" || echo "FOUND: $CODEX_BIN"
```
diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md
index 20c0fc1a55..ef7c1fb950 100644
--- a/design-consultation/SKILL.md
+++ b/design-consultation/SKILL.md
@@ -1345,7 +1345,7 @@ If user chooses B, skip this step and continue.
**Check Codex availability:**
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
**If Codex is available**, launch both voices simultaneously:
diff --git a/design-review/SKILL.md b/design-review/SKILL.md
index c58ce6719d..ee8d1673df 100644
--- a/design-review/SKILL.md
+++ b/design-review/SKILL.md
@@ -1965,7 +1965,7 @@ Record baseline design score and AI slop score at end of Phase 6.
**Check Codex availability:**
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
**If Codex is available**, launch both voices simultaneously:
diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md
index ece2ab89df..ec4f2eb150 100644
--- a/office-hours/SKILL.md
+++ b/office-hours/SKILL.md
@@ -1470,7 +1470,7 @@ Use AskUserQuestion to confirm. If the user disagrees with a premise, revise und
**Binary check first:**
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
Use AskUserQuestion (regardless of codex availability):
@@ -1740,7 +1740,7 @@ The screenshot file at `/tmp/gstack-sketch.png` can be referenced by downstream
After the wireframe is approved, offer outside design perspectives:
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
If Codex is available, use AskUserQuestion:
diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md
index 0791beeb0c..aba93691ed 100644
--- a/plan-ceo-review/SKILL.md
+++ b/plan-ceo-review/SKILL.md
@@ -1865,7 +1865,7 @@ thorough review.
**Check tool availability:**
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
Use AskUserQuestion:
diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md
index 396df1217b..76b00775ea 100644
--- a/plan-design-review/SKILL.md
+++ b/plan-design-review/SKILL.md
@@ -1519,7 +1519,7 @@ If user chooses B, skip this step and continue.
**Check Codex availability:**
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
**If Codex is available**, launch both voices simultaneously:
diff --git a/plan-devex-review/SKILL.md b/plan-devex-review/SKILL.md
index 3869d47d8c..5110937073 100644
--- a/plan-devex-review/SKILL.md
+++ b/plan-devex-review/SKILL.md
@@ -1861,7 +1861,7 @@ thorough review.
**Check tool availability:**
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
Use AskUserQuestion:
diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md
index 06fbf7b805..d62a32f118 100644
--- a/plan-eng-review/SKILL.md
+++ b/plan-eng-review/SKILL.md
@@ -1478,7 +1478,7 @@ thorough review.
**Check tool availability:**
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
Use AskUserQuestion:
diff --git a/review/SKILL.md b/review/SKILL.md
index a2092af9e1..c7d63508c3 100644
--- a/review/SKILL.md
+++ b/review/SKILL.md
@@ -1818,7 +1818,7 @@ Every diff gets adversarial review from both Claude and Codex. LOC is not a prox
DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")
DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0")
DIFF_TOTAL=$((DIFF_INS + DIFF_DEL))
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
# Legacy opt-out — only gates Codex passes, Claude always runs
OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true)
echo "DIFF_SIZE: $DIFF_TOTAL"
diff --git a/ship/SKILL.md b/ship/SKILL.md
index 7548415246..5f8ac7bbc4 100644
--- a/ship/SKILL.md
+++ b/ship/SKILL.md
@@ -2191,7 +2191,7 @@ Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "is
7. **Codex design voice** (optional, automatic if available):
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
If Codex is available, run a lightweight design check on the diff:
@@ -2544,7 +2544,7 @@ Every diff gets adversarial review from both Claude and Codex. LOC is not a prox
DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")
DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0")
DIFF_TOTAL=$((DIFF_INS + DIFF_DEL))
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
# Legacy opt-out — only gates Codex passes, Claude always runs
OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true)
echo "DIFF_SIZE: $DIFF_TOTAL"
diff --git a/test/fixtures/golden/claude-ship-SKILL.md b/test/fixtures/golden/claude-ship-SKILL.md
index 7548415246..5f8ac7bbc4 100644
--- a/test/fixtures/golden/claude-ship-SKILL.md
+++ b/test/fixtures/golden/claude-ship-SKILL.md
@@ -2191,7 +2191,7 @@ Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "is
7. **Codex design voice** (optional, automatic if available):
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
If Codex is available, run a lightweight design check on the diff:
@@ -2544,7 +2544,7 @@ Every diff gets adversarial review from both Claude and Codex. LOC is not a prox
DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")
DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0")
DIFF_TOTAL=$((DIFF_INS + DIFF_DEL))
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
# Legacy opt-out — only gates Codex passes, Claude always runs
OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true)
echo "DIFF_SIZE: $DIFF_TOTAL"
diff --git a/test/fixtures/golden/factory-ship-SKILL.md b/test/fixtures/golden/factory-ship-SKILL.md
index 9a5e09b6ad..6d71afc1ad 100644
--- a/test/fixtures/golden/factory-ship-SKILL.md
+++ b/test/fixtures/golden/factory-ship-SKILL.md
@@ -2182,7 +2182,7 @@ Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "is
7. **Codex design voice** (optional, automatic if available):
```bash
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
```
If Codex is available, run a lightweight design check on the diff:
@@ -2535,7 +2535,7 @@ Every diff gets adversarial review from both Claude and Codex. LOC is not a prox
DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0")
DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0")
DIFF_TOTAL=$((DIFF_INS + DIFF_DEL))
-which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
+command -v codex >/dev/null 2>&1 && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
# Legacy opt-out — only gates Codex passes, Claude always runs
OLD_CFG=$($GSTACK_ROOT/bin/gstack-config get codex_reviews 2>/dev/null || true)
echo "DIFF_SIZE: $DIFF_TOTAL"