diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index de81d11..085d773 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -15,9 +15,20 @@ # Workflow and CI configuration — the most security-sensitive paths. /.github/ @daniel-pittman -# Core source — the bash CLI script. -# Changes here can affect runtime behaviour or surface secrets. +# Core source — the bash CLI script and the parallel Python implementation +# the MCP server uses. Changes to any of these can affect runtime behaviour +# or surface secrets. The bash script and the Python modules are PARALLEL +# implementations of the same Bitbucket REST contract; review of one often +# implies review of the other (see CONTRIBUTING.md for the parity rule). /bb @daniel-pittman +/bb_api.py @daniel-pittman +/bb_ops.py @daniel-pittman +/git_ops.py @daniel-pittman +/mcp_server.py @daniel-pittman # Release versioning. A `VERSION` bump corresponds 1:1 with a release tag. /VERSION @daniel-pittman + +# Packaging + agent definition. +/pyproject.toml @daniel-pittman +/agents/ @daniel-pittman diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ff765a9..09d3b3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -# Continuous integration: bash syntax + tooling sanity. +# Continuous integration: bash syntax + Python tests. # # This workflow needs NO secrets and is safe to run on every push and PR, # including PRs from forks. The secret-sensitive Claude review workflows are @@ -17,6 +17,16 @@ permissions: jobs: syntax: runs-on: ubuntu-latest + strategy: + # Enforce the Python compatibility floor advertised in pyproject.toml + # (requires-python = ">=3.10"). 3.10 is the floor because bb_api uses + # PEP 604 union syntax (`X | None`) and PEP 585 builtin generics + # (`list[str]`) as runtime-resolved annotations; both are 3.10 features. + # 3.12 catches removed-feature regressions. fail-fast: false so a + # problem in one version doesn't mask problems in others. + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12"] steps: # SHA-pinned actions/checkout v4.2.2 - name: Check out the repository @@ -36,3 +46,22 @@ jobs: command -v curl >/dev/null || { echo "curl missing"; exit 1; } bash --version | head -1 jq --version + + # SHA-pinned actions/setup-python v5.3.0 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 + with: + python-version: ${{ matrix.python-version }} + + - name: Python syntax check + run: python -m py_compile bb_api.py bb_ops.py git_ops.py mcp_server.py + + # Install pytest only. The MCP server's heavy dependencies (mcp) are + # exercised at runtime, not here — the unit tests mock the network and + # the MCP transport entirely, so pulling them into CI would just slow + # the matrix down for no signal. + - name: Install pytest + run: python -m pip install --upgrade pip pytest + + - name: Run pytest + run: python -m pytest tests/ -v diff --git a/.gitignore b/.gitignore index a9ad188..76006c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,18 @@ .env .idea/ +.claude/ + +# Python +__pycache__/ +*.py[cod] +*.egg-info/ +.venv/ +venv/ +.pytest_cache/ +.ruff_cache/ +.mypy_cache/ +build/ +dist/ + +# macOS +.DS_Store diff --git a/CLAUDE.md b/CLAUDE.md index b4cd316..f3d85eb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,67 +4,115 @@ This file provides guidance to Claude Code when working with this repository. ## Overview -`bb` is a bash CLI wrapper around the Bitbucket Cloud REST API. It provides commands for managing pipelines, pull requests, branches, and repositories from the terminal. +`bb` is a Bitbucket Cloud client with two parallel implementations of the same REST contract: + +- **Bash CLI** (`bb`) — human-facing, depends only on `curl` + `jq`. +- **Python MCP server** (`mcp_server.py` + `bb_api.py` + `bb_ops.py` + `git_ops.py`) — exposes the same Bitbucket surface plus git-context helpers as MCP tools that Claude Code (or any MCP-aware client) can call directly. + +Both implementations target Bitbucket Cloud REST API v2.0. Neither wraps the other — they speak HTTP directly. ## Project Structure ``` bitbucket-cli/ -├── bb # Main CLI script (bash) -├── README.md # User documentation -├── LICENSE # MIT license -├── CLAUDE.md # This file -├── .env.example # Example environment config -└── .gitignore # Git ignore rules +├── bb # Bash CLI (cmd_* functions, ~1.4k lines) +├── bb_api.py # urllib-based HTTP client, pagination, redacting +├── bb_ops.py # Bitbucket operations: pipelines / PRs / repos / branches / vars / commits +├── git_ops.py # subprocess wrappers: branch / status / remote / commits / diffs +├── mcp_server.py # FastMCP tool registry + self-bootstrap venv +├── agents/bitbucket.md # Generic agent definition for the MCP server +├── tests/ # pytest suite (~360 tests) +├── pyproject.toml # Python packaging + pytest config +├── docs/img/ # social-preview.png and other assets +├── README.md # User-facing docs (install, usage, MCP setup) +├── CONTRIBUTING.md # Parity rule, GitFlow, branch protection +├── SECURITY.md # Disclosure policy + maintainer checklist +└── .github/ # CI, Claude code-review / security-review workflows, CODEOWNERS ``` ## Key Technical Details -- **Language**: Pure bash (no external dependencies beyond curl and jq) +- **Languages**: bash (3.2+ — macOS system bash is supported) and Python (3.10+) - **API**: Bitbucket Cloud REST API v2.0 (`https://api.bitbucket.org/2.0`) -- **Auth**: HTTP Basic authentication using Atlassian API tokens - - `BB_USER`: Bitbucket account email address - - `BB_TOKEN`: API token from id.atlassian.com - - `BB_WORKSPACE`: Bitbucket workspace slug +- **Auth**: HTTP Basic using Atlassian API tokens + - `BB_USER`: Bitbucket account email + - `BB_TOKEN`: API token from id.atlassian.com (never echoed) + - `BB_WORKSPACE`: workspace slug +- **MCP runtime**: stdlib-only at runtime; the `mcp` package is the only third-party dep, installed into a self-bootstrapped venv at `$XDG_DATA_HOME/bitbucket-cli/venv` (default `~/.local/share/bitbucket-cli/venv`) on first invocation. ## Configuration -The script loads config from two locations (in order): -1. `~/.config/bb/config` - User config file -2. `.env` in script directory - Local override (gitignored) +Loaded in this order (later overrides earlier): +1. `~/.config/bb/config` — user config +2. `.env` in script directory — local override (gitignored) +3. Environment variables (highest priority) ## Code Conventions -- Functions are named `cmd_*` for user-facing commands -- Helper functions: `bb_get`, `bb_post`, `bb_put`, `bb_delete` for API calls -- `detect_repo()` auto-detects repo from git remote if not provided -- `format_state()` normalizes pipeline/PR states to 4-char display codes +### Bash (`bb`) +- User-facing commands: `cmd_` functions. +- HTTP helpers: `bb_get` / `bb_post` / `bb_put` / `bb_delete`. +- `detect_repo` / `repo_path` resolve the repo from git remote when not supplied. +- Boundary validation via helpers like `_require_build_number` (rejects non-numeric). +- Variables are passed to `jq -Rs` with NUL delimiters to prevent injection. +- Error rc capture: `if cmd; then ...; else local rc=$?; ...; exit $rc; fi` (never lose the exit code). + +### Python (`bb_api.py`, `bb_ops.py`, `git_ops.py`, `mcp_server.py`) +- `BBClient` injected as first arg into every `bb_ops` function. +- `bb_ops._` naming (e.g. `pipeline_trigger`, `pr_create`). +- `_is_positive_int` guard rejects bool (the bool-is-int trap). +- MCP tools resolve repo via `_resolve_repo` (rejects malformed/`.`/`..`/whitespace BEFORE any network call). +- Error envelopes route ALL string fields (`message`, `body`, `stderr`, `url`) through `_safe_text` / `_redact_url` — single chokepoint, not per-field. +- `_error_dict_with(e, ...)` threads request identifiers (pr_id, number, step_index) into the error dict for correlation. + +### Parallel-implementation parity rule +The bash and Python sides implement the **same Bitbucket REST contract** independently. When a defect surfaces in either side (URL construction, body shape, parameter naming), the fix lands in BOTH paths. Tests verify the correct contract — never pin existing buggy behavior. See CONTRIBUTING.md. ## Adding New Commands -1. Create a `cmd_yourcommand()` function -2. Add it to the case statement at the bottom of the script -3. Add help text in `cmd_help()` -4. Update README.md with usage examples +For end-user CLI features: +1. Add `cmd_yourcommand()` in `bb`. +2. Wire into the case statement at the bottom of the script. +3. Add help text in `cmd_help()`. +4. Update README.md usage examples. -## Testing +For MCP-tool features: +1. Add `bb_ops._()` Python function. +2. Add pytest coverage in `tests/` (assert URL + method + body shape — not just status). +3. Add a thin `@mcp.tool()` wrapper in `mcp_server.py` that calls `_resolve_repo`, invokes the ops function, returns `{"ok": True, ...}` on success and `_error_dict_with(e, ...)` on failure. +4. Update the tool-surface table in `agents/bitbucket.md`. -Manual testing against a Bitbucket workspace: +If the new feature applies to both surfaces (the dominant case), do both. Parity is the default. + +## Testing ```bash -# Verify auth works -bb repos +# Python tests (run from repo root) +pytest -# Test pipeline commands -bb pipelines your-repo -bb pipeline your-repo 1 +# Specific test file +pytest tests/test_mcp_server.py -v -# Test PR commands -bb prs your-repo +# Bash smoke tests (manual, against your workspace) +bb whoami +bb repos +bb pipelines your-repo ``` +CI runs `pytest` on every PR. Bash side is smoke-tested manually before release. + +## Security Posture + +- BB_TOKEN never echoed (whoami, error dicts, log lines). +- URL credential leaks (`https://user:token@host`) stripped in all redactors. +- Signed-URL query parameters (AWS X-Amz-Signature, Azure SAS, GCP signing, bearer / access_token / api_key) stripped from error URLs. +- Cross-host Authorization stripping on redirect (so the Bitbucket Basic header never reaches S3). +- Pipeline variable values masked as `KEY=***` when echoed back to the user. +- subprocess calls use `GIT_TERMINAL_PROMPT=0`, `GIT_ASKPASS=""`, `stdin=DEVNULL`, timeout — no interactive prompts can hang the MCP server. +- See `SECURITY.md` for the full posture and disclosure policy. + ## Known Limitations -- Manual pipeline step approval requires the Bitbucket UI (API doesn't support it) -- Large log outputs may be truncated by the API -- Rate limiting is not handled (Bitbucket has generous limits) +- Manual pipeline step approval requires the Bitbucket UI (REST API doesn't support it). +- Diffs from `git_uncommitted_changes` are capped at 1 MiB (with truncation marker); untracked file list capped at 10 000 entries. +- Rate limiting is not handled explicitly (Bitbucket Cloud has generous limits; if you hit one, the MCP error envelope surfaces the 429 cleanly). diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b42b441..4198531 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -21,7 +21,7 @@ The first time an outside contributor opens a PR, GitHub holds Actions execution ## CI and review automation -- **`syntax` (required)** — bash syntax + tooling sanity. Must pass before merge into `develop` or `main`. No secrets needed; runs on every push and PR including from forks. +- **`syntax` (required)** — bash syntax check, runtime-tooling presence check, and the Python test suite across Python 3.10 / 3.11 / 3.12. Must pass before merge into `develop` or `main`. No secrets needed; runs on every push and PR including from forks. - **Claude code review (advisory)** — automated PR review using a subscription-bound OAuth token. Output appears as a PR comment; not a merge gate. - **Claude security review (advisory)** — runs only on PRs targeting `main` or `develop`. Uses a metered Anthropic API key; also advisory. @@ -34,19 +34,39 @@ The interactive `@claude` bot is available for maintainer-triggered triage. **On - `detect_repo()` is the auto-detect entry point for resolving a repo from the local git remote. Reuse it; don't duplicate the parsing. - Updates to commands should be reflected in `README.md`, `CLAUDE.md`, and the inline `cmd_help()` block together — they all describe the same surface from different angles. -## No personal data in public code +## Bash and Python are parallel implementations -Examples, fixtures, docs, and comments must use **fictional names** and **generic institutional abbreviations** — not real colleagues, organizations, repositories, or workspace slugs from the maintainer's day-to-day use. Real names land in public git history permanently. Default substitutes: +The `bb` bash script and the Python modules (`bb_api.py`, `bb_ops.py`, `mcp_server.py`) are **parallel implementations of the same Bitbucket REST contract**, not layered on top of each other: -- Personal names: Alice Garcia, Bob Jones, Carol Lee. -- Institutional / workspace abbreviations: `acme`, `widget-co`, `example-workspace`. -- Emails: `user@example.com` (RFC-reserved domain; cannot accidentally collide). +``` +bb (bash) <--> Bitbucket REST API <--> bb_api / bb_ops (Python) + (single source + of truth) +``` -The discipline is at the moment of writing — easier than scrubbing later. +The MCP server talks to Bitbucket directly via Python; it does not shell out to `bb` and parse its output. + +**The parity rule:** when a test in `tests/test_bb_*.py` surfaces a defect in URL construction, body shape, parameter naming, or auth handling, the fix lands in the Python module *and* in `bb` if `bb` has the parallel logic. Tests verify the **correct** API contract — they do not pin existing buggy behaviour on either side. If you find yourself writing a test that only passes against the current implementation despite knowing the contract is wrong, that's a signal to fix the code, not pin the bug. + +Practical workflow when adding or changing an endpoint call: + +1. Write the test against what the Bitbucket API documents should happen. +2. Implement it correctly in the Python module. +3. Inspect `bb` — does it call the same endpoint with the same shape? If yes, mirror the fix. +4. Update both code paths in the same PR. + +## Testing + +Run the Python suite locally: -## Testing locally +```bash +python -m pip install pytest +python -m pytest tests/ -v +``` -There's no unit-test suite (the script wraps a remote API; meaningful tests would need a live workspace). Before opening a PR, at minimum: +The suite mocks HTTP at `urllib.request` and does not need a live Bitbucket workspace. CI runs the same suite across Python 3.10 / 3.11 / 3.12 in addition to the `bash -n bb` syntax check. + +Manual coverage for the bash side is still on you when touching `bb` — at minimum: ```bash bash -n bb # syntax check, same as CI @@ -54,4 +74,13 @@ bb help # smoke-test the dispatcher bb repos # round-trip a real API call against your own workspace ``` -Manual coverage against the surface you changed is the bar. +## No personal data in public code + +Examples, fixtures, docs, and comments must use **fictional names** and **generic institutional abbreviations** — not real colleagues, organizations, repositories, or workspace slugs from the maintainer's day-to-day use. Real names land in public git history permanently. Default substitutes: + +- Personal names: Alice Garcia, Bob Jones, Carol Lee. +- Institutional / workspace abbreviations: `acme`, `widget-co`, `example-workspace`. +- Emails: `user@example.com` (RFC-reserved domain; cannot accidentally collide). + +The discipline is at the moment of writing — easier than scrubbing later. + diff --git a/README.md b/README.md index ec7a087..85e91e0 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,33 @@ # bb - Bitbucket CLI +

+ bb — Bitbucket Cloud CLI +

+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![CI](https://github.com/daniel-pittman/bitbucket-cli/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/daniel-pittman/bitbucket-cli/actions/workflows/ci.yml) -[![Bash](https://img.shields.io/badge/bash-4.0%2B-1f425f.svg)](https://www.gnu.org/software/bash/) +[![Bash](https://img.shields.io/badge/bash-3.2%2B-1f425f.svg)](https://www.gnu.org/software/bash/) +[![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/) [![GitHub release](https://img.shields.io/github/v/release/daniel-pittman/bitbucket-cli)](https://github.com/daniel-pittman/bitbucket-cli/releases) -A lightweight command-line interface for Bitbucket Cloud. Wraps the Bitbucket REST API for common operations like managing pipelines, pull requests, and repositories. +A lightweight command-line interface for Bitbucket Cloud. Wraps the Bitbucket REST API for common operations like managing pipelines, pull requests, and repositories. Ships with a Python [MCP server](#mcp-server-for-claude-code--ai-agents) so any [Claude Code](https://docs.claude.com/en/docs/claude-code) session (or any other MCP-aware client) can drive Bitbucket Cloud as native tools. -No dependencies beyond `curl` and `jq`. Works on macOS, Linux, and WSL. +The bash CLI has no dependencies beyond `curl` and `jq`. The MCP server adds Python 3.10+. Works on macOS, Linux, and WSL. ## Features - **Pipelines**: List, view, watch, trigger, and stop pipeline builds -- **Pull Requests**: Create, view, approve, merge, and manage PRs -- **Repositories**: List repos, view details, browse branches +- **Pull Requests**: Create, view, approve, unapprove, merge, decline, diff, comment +- **Repositories**: List repos, view details, list/show branches, list recent commits - **Browser Integration**: Quick-open any resource in your browser +- **MCP server**: 30 tools covering the full surface, plus git-context wrappers (current branch, status, recent commits, uncommitted changes) for agent workflows ## Requirements -- `bash` (4.0+) +- `bash` (3.2+) — works with macOS system bash; bash 4+ also fine - `curl` - usually pre-installed on macOS/Linux - `jq` - JSON processor ([install instructions](https://jqlang.github.io/jq/download/)) +- Python 3.10+ (only required for the MCP server) ### Installing jq @@ -57,14 +64,23 @@ choco install jq chmod +x bb ``` -3. Symlink to your PATH: +3. Symlink to your PATH. Pick the directory that's on your PATH and that you own: ```bash - ln -s "$(pwd)/bb" /usr/local/bin/bb + # macOS with Homebrew (no sudo needed; resolves to /opt/homebrew on + # Apple Silicon, /usr/local on Intel Macs): + ln -s "$(pwd)/bb" "$(brew --prefix)/bin/bb" + + # macOS without Homebrew, or Linux (needs sudo on most setups): + sudo ln -s "$(pwd)/bb" /usr/local/bin/bb ``` - Or add the directory to your PATH: + Or add the directory to your PATH (pick the rc file your shell uses; + macOS defaults to zsh since Catalina): ```bash + # bash: echo 'export PATH="$PATH:/path/to/bitbucket-cli"' >> ~/.bashrc + # zsh (default on macOS): + echo 'export PATH="$PATH:/path/to/bitbucket-cli"' >> ~/.zshrc ``` ## Configuration @@ -189,6 +205,141 @@ bb logs my-repo 42 1 bb open my-repo settings ``` +## MCP server (for Claude Code / AI agents) + +A Python [Model Context Protocol](https://modelcontextprotocol.io/) server (`mcp_server.py`) ships as a peer to the `bb` bash script. Both implement the **same Bitbucket Cloud REST contract** independently — the MCP server does not shell out to `bb`; it speaks HTTP directly via Python stdlib (no `requests` etc.). Any [Claude Code](https://docs.claude.com/en/docs/claude-code) session — or any other MCP-aware client — can drive Bitbucket Cloud as native tools without invoking the CLI. + +### What it exposes + +30 tools covering pipelines, pull requests, repos, branches, commits, pipeline variables, and git-context helpers: + +| Category | Tools | +|---|---| +| Pipelines (read) | `pipelines_list`, `pipeline_show`, `pipeline_steps`, `pipeline_logs` | +| Pipelines (write) | `pipeline_trigger`, `pipeline_stop` | +| Pull requests (read) | `prs_list`, `pr_show`, `pr_activity`, `pr_diff`, `pr_comments_list` | +| Pull requests (write) | `pr_create`, `pr_approve`, `pr_unapprove`, `pr_merge`, `pr_decline`, `pr_comment_add` | +| Repos / metadata | `repos_list`, `repo_show`, `branches_list`, `branch_show`, `commits_list`, `vars_list`, `downloads_list` | +| Git context | `git_current_branch`, `git_status`, `git_remote_repo`, `git_recent_commits`, `git_uncommitted_changes` | +| Meta | `whoami` (see note below) | + +Note on `whoami`: resolves config + git context + a workspace-reachability probe (single low-cost `GET /repositories/{workspace}?pagelen=1`, 10 s timeout). Never echoes `BB_TOKEN`. The probe requires `repository:read` scope — a workspace-scoped token granting only `pipelines:read` or `pullrequest:read` will report `auth.ok=False` even though pipeline/PR tools still work, so treat the probe as a scope hint rather than a global credential verdict. + +Every tool that takes a repo argument supports auto-detection (omit `repo` to resolve from the current git checkout's `origin` remote — or from `BB_DEFAULT_REPO_PATH` if set; see [Environment overrides](#environment-overrides) below) and workspace override (`workspace/repo` shape). + +### Requirements + +- Python 3.10+ available on PATH (the bash CLI doesn't need Python — only the MCP server does). +- The same `~/.config/bb/config` (or `BB_USER` / `BB_TOKEN` / `BB_WORKSPACE` env vars) as the CLI — see [Configuration](#configuration) above. + +### MCP server install + +```bash +# 1. Make sure bb itself is installed and configured (see Configuration above). + +# 2. Register the MCP server with Claude Code (user scope = all sessions on this machine): +claude mcp add --scope user bitbucket \ + -- python3 /absolute/path/to/bitbucket-cli/mcp_server.py # ← replace with your clone path + +# `python3` is intentionally bare — `claude mcp add` inherits PATH, so a +# Homebrew or pyenv Python 3.10+ resolves naturally. Do NOT hardcode +# /usr/bin/python3 on macOS: Apple's bundled Python at that path is 3.9, +# which is below the MCP server's 3.10 minimum. The `--` separator before +# `python3` keeps the command robust if you later add `--env` flags +# (see "Multiple workspaces" below). + +# 3. On first invocation, the server self-bootstraps a durable venv at +# $XDG_DATA_HOME/bitbucket-cli/venv (default: ~/.local/share/bitbucket-cli/venv) +# and installs the `mcp` package into it. Subsequent launches reuse the venv. +# +# To force a clean rebuild: +# rm -rf "${XDG_DATA_HOME:-$HOME/.local/share}/bitbucket-cli/venv" +# and relaunch the MCP server. + +# 4. Verify the connection (handshake only — does NOT validate credentials): +claude mcp list +# Should show: bitbucket: ... - ✓ Connected +# +# First invocation may briefly show "✗ Failed to connect" while the venv +# bootstraps (pip-installs `mcp`, 5-30 s depending on network). Retry once. + +# 5. Verify credentials in a Claude Code session by asking it to run the +# `whoami` tool — `Connected` above only confirms the stdio handshake; +# `whoami` confirms BB_USER/BB_TOKEN/BB_WORKSPACE actually resolve AND +# the token reaches your configured workspace. +``` + +**Multiple workspaces:** to register more than one server (e.g. `bitbucket-work` and `bitbucket-personal` pointing at different workspaces), use `--env` flags per registration so each server entry carries its own credentials: + +```bash +claude mcp add --scope user bitbucket-work \ + --env BB_USER=you@work.com \ + --env BB_TOKEN=... \ + --env BB_WORKSPACE=acme \ + -- python3 /absolute/path/to/bitbucket-cli/mcp_server.py +``` + +### Other MCP clients + +`mcp_server.py` is a stdio MCP server, so any client that speaks MCP-over-stdio can use it. The command is `python3 /absolute/path/to/bitbucket-cli/mcp_server.py` — same Python-version constraint as above. Credentials come from `~/.config/bb/config` or environment variables; clients that strip `HOME` from the subprocess environment need to pass `BB_USER` / `BB_TOKEN` / `BB_WORKSPACE` explicitly instead. + +### Environment overrides + +| Variable | Purpose | +|---|---| +| `BB_DEFAULT_REPO_PATH` | Default working directory for repo auto-detection (when a Bitbucket tool is called with `repo=""`) AND for the `git_*` tools' default `path=""` resolution. Defaults to the MCP server's launch cwd. | +| `XDG_DATA_HOME` | Standard XDG override for the data root. The venv lives at `$XDG_DATA_HOME/bitbucket-cli/venv` (default `~/.local/share/bitbucket-cli/venv`). | + +### Optional: install the bundled `bitbucket` agent for delegated use + +The MCP server exposes the *tools*; the bundled **agent** (`agents/bitbucket.md` in this repo) is the *behavioral layer* that makes a Claude Code session use those tools intelligently — propose-first protocol for destructive ops (`pr_merge`, `pr_decline`, `pipeline_stop`, `pr_unapprove`), resolve-git-context-first before any Bitbucket call, show-diffs-before-merge discipline, bash/Python parity rule for delegated CLI maintenance, and the project-conventions checklist for tracking per-workspace defaults. + +The bundled `agents/bitbucket.md` is a **deliberately-generic template** — it ships with placeholder examples (`acme/widget-service`, fictional reviewers, generic custom-pipeline patterns like `deploy-prod`) and an explicitly-blank "Project-specific conventions" section at the bottom. After copying it to `~/.claude/agents/bitbucket.md`, personalize your local copy with your default workspace, required reviewers, custom pipeline patterns, branch naming conventions, and any other non-generic context. **Anything you contribute back to this repo via PR should be re-genericized first** — real workspace slugs, real ticket titles, real reviewer handles, and project-specific custom-pipeline patterns belong only in your personal `~/.claude/agents/` copy, never in the upstream-tracked version. + +The agent is a single Markdown file with frontmatter. To install: + +```bash +# 1. Copy the agent definition into user-scope agents. +mkdir -p ~/.claude/agents +cp agents/bitbucket.md ~/.claude/agents/bitbucket.md + +# 2. Customize the "Project-specific conventions" section near the bottom +# for your project(s) — default workspace, required reviewers, custom +# pipeline patterns, sensitive variable names. The file ships with a +# checklist of what to capture per project. + +# 3. Newly-started Claude Code sessions pick up the agent automatically. +# Existing sessions need a restart. In any new session you can then +# delegate to it: +# +# "Use the bitbucket agent to merge PR 42" +# "Have the bitbucket agent watch pipeline 142" +# "Ask the bitbucket agent to trigger a deploy-prod run on main" +``` + +The agent description tells Claude Code's orchestrator when to delegate to it automatically (e.g. when the user mentions Bitbucket pipeline / PR / repo operations). You don't have to invoke it by name. + +What the agent enforces on top of the raw tools: + +The MCP tools already do per-call auto-detection on their own (source-branch auto-detect on `pr_create`, repo auto-detect on every Bitbucket tool given `repo=""`) — the agent doesn't re-implement those. What the agent adds: + +| Behavior | Raw MCP tools | With bundled `bitbucket` subagent | +|---|---|---| +| **Destructive ops** (`pr_merge`, `pr_decline`, `pipeline_stop`, `pr_unapprove`) | Fired immediately when invoked | Propose-first: show diff / activity / current state, confirm with user, then act | +| **Pipeline failure investigation** | Caller must navigate `pipeline_show` → `pipeline_steps` → `pipeline_logs` manually | Triages in that order, surfaces the relevant log tail (last ~50 lines around the failure) instead of dumping the whole stream | +| **Avoiding redundant probes** | Caller may re-fetch `git_current_branch` / `git_remote_repo` per call even when the tool would auto-detect | Lets tool-level auto-detect carry the call (passes `repo=""` and omits `source_branch=` instead of pre-fetching git context just to echo it back) | +| **`bb`-CLI maintenance** (delegated) | Re-discovers the parity rule, naming conventions, redaction patterns per session | Owns the design → implement → test → docs → PR cycle with the rules already baked in | +| **Project conventions** | Re-discovered each session | Read from the agent file's "Project-specific conventions" section (your local copy) before any write op | + +### Security + +- `BB_TOKEN` is never echoed (`whoami`, error envelopes, log lines). +- URL credentials (`https://user:token@host/...`) and signed-URL query parameters (AWS / Azure / GCP / bearer / access_token / api_key) are stripped from every error message. +- Cross-host `Authorization` headers are stripped on redirect so the Bitbucket Basic header never reaches S3 when fetching pipeline logs. +- Pipeline variable values are masked as `KEY=***` when echoed back. + +The agent file is genuinely portable — strip the example "Project-specific conventions" section and you have a clean template that works for any Bitbucket Cloud workspace. + ## License MIT License - see [LICENSE](LICENSE) for details. diff --git a/VERSION b/VERSION index 7dea76e..9084fa2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.1 +1.1.0 diff --git a/agents/bitbucket.md b/agents/bitbucket.md new file mode 100644 index 0000000..9946e26 --- /dev/null +++ b/agents/bitbucket.md @@ -0,0 +1,245 @@ +--- +name: bitbucket +description: Use this agent for Bitbucket Cloud operations on any project hosted on Bitbucket — pipelines, pull requests, repos, branches — AND for development work on the `bb` CLI itself. Wraps the `bb` bash CLI and the bitbucket MCP server. Handles pipeline triggering / monitoring / log retrieval, PR lifecycle (create / review / approve / merge / decline / comment), branch and commit inspection, repo metadata lookups, and git-context tools that resolve the current checkout's workspace / repo / branch before invoking API calls. Also implements delegated `bb`-CLI enhancements end-to-end — design, implement, test, document, PR — covering the `bb` bash script, the bb_api / bb_ops / git_ops Python modules, the MCP server, and this agent definition itself. Propose-first for destructive operations. +--- + +# Bitbucket — Pipeline / PR / Repo Operations & `bb` CLI Maintenance Agent + +User-scope agent with **two complementary responsibilities**: + +1. **Bitbucket Cloud operations** — manage pipelines, pull requests, branches, and repos via the `bb` bash CLI and its accompanying MCP server: pipeline runs, log retrieval, PR lifecycle, branch lookups, commit history. +2. **`bb` CLI maintenance** — own the `bb` source. When the orchestrator delegates a feature add, bug fix, or refactor to `bb`, this agent owns the full cycle (design → implement → test → docs → PR). See "Extending `bb` itself" below for the workflow. + +This agent exists because (a) `bb` has a wide tool surface (pipelines, PRs, branches, commits, repo metadata) and Bitbucket workspaces vary in their conventions (branch naming, required reviewers, custom-pipeline patterns), so recurring tasks benefit from being delegated rather than re-learned every session, and (b) `bb` is an evolving tool that needs occasional extension — those extensions should land via the same agent that already knows the CLI's conventions and the Bitbucket Cloud REST API. + +--- + +## What this agent does + +1. **Pipeline operations** — list / show / trigger / stop / watch pipelines; pull step-level logs; resolve build numbers across the most-recent 100 pipelines (single page) for bash, or a 2000-pipeline scan (up to 20 pages) via MCP so older runs are still findable. +2. **Pull-request lifecycle** — list / show / create / approve / unapprove / merge / decline; view diffs; list and add comments. PR creation auto-detects the source branch from the current git checkout when not specified. +3. **Repo introspection** — list workspace repos with optional BBQL filtering; show single-repo metadata (language, size, clone URLs, default branch); list / show branches with URL-encoding for slash-containing names; list recent commits across all branches or per-branch. +4. **Pipeline configuration** — view repo variables (with `secured` flag awareness so callers don't misread a `null` value as "unset" when it's actually "masked"). +5. **Git context resolution** — `git_current_branch`, `git_status` (structured: clean/dirty + ahead/behind + staged/modified/untracked/unmerged lists), `git_remote_repo`, `git_recent_commits`, `git_uncommitted_changes`. Used before any Bitbucket API call to resolve the workspace and repo slug from the local checkout. +6. **Connectivity smoke test** — `whoami` reports resolved config + git context + a workspace-reachability probe (single low-cost `GET /repositories/{workspace}?pagelen=1`, 10 s timeout) that does NOT echo the BB_TOKEN. The probe requires `repository:read` scope, so a workspace-scoped token granting only `pipelines:read` or `pullrequest:read` will report `auth.ok=False` even though those tools still work — treat the probe as a scope hint, not a global credential verdict. +7. **`bb` CLI development** — when delegated by the orchestrator: design, implement, test, document, and PR enhancements to the `bb` CLI, the Python modules (`bb_api.py`, `bb_ops.py`, `git_ops.py`), the MCP server (`mcp_server.py`), or this agent definition itself. The bash CLI and the Python modules are **parallel implementations** of the same Bitbucket REST contract — see CONTRIBUTING.md's parity rule. + +## When NOT to use this agent + +- Pure `git` operations unrelated to Bitbucket (rebases, conflict resolution, history rewrites) — use `git` directly. +- Code edits — use direct tools (Read, Edit, Write) in the orchestrator. +- GitHub-hosted projects — Bitbucket-specific; for GitHub use `gh` directly. +- Non-Bitbucket project questions ("who's on the team", "what's our deploy cadence") — use the appropriate domain-specific source. + +--- + +## Tool surface + +### Pipelines (read) + +- `pipelines_list(repo?, count?, branch?, sort?)` — recent pipelines (default 10, sorted newest first). Optional branch filter. +- `pipeline_show(number, repo?)` — full pipeline detail by build number. +- `pipeline_steps(number, repo?)` — list of step records for a pipeline. +- `pipeline_logs(number, step_index, repo?, timeout?)` — raw log text for a single step (0-based step index). Follows Bitbucket's 307 to S3 with cross-host Authorization stripping. + +### Pipelines (write) + +- `pipeline_trigger(branch, repo?, pattern?, variables?)` — run a pipeline. Without `pattern`, the branch's default pipeline runs; with `pattern`, the named custom pipeline. `variables` is a `{name: value}` dict. +- `pipeline_stop(number, repo?)` — stop a running pipeline. + +### Pull requests (read) + +- `prs_list(repo?, state?, count?)` — filter by state (OPEN / MERGED / DECLINED / SUPERSEDED), default OPEN. +- `pr_show(pr_id, repo?)` — full PR detail. +- `pr_activity(pr_id, repo?, count?)` — activity stream (approvals, comments, state transitions). +- `pr_diff(pr_id, repo?, timeout?)` — raw unified-diff text. +- `pr_comments_list(pr_id, repo?, count?)` — comments on the PR. + +### Pull requests (write) + +- `pr_create(title, source_branch?, destination_branch?, repo?, description?, close_source_branch?, reviewers?)` — `source_branch` auto-detects from the current git branch when empty; rejects detached-HEAD state. `reviewers` is a list of Bitbucket account UUIDs. +- `pr_approve(pr_id, repo?)` / `pr_unapprove(pr_id, repo?)` — toggle approval. +- `pr_merge(pr_id, repo?, strategy?, close_source_branch?, message?)` — strategies: `merge_commit` (default), `squash`, `fast_forward`. +- `pr_decline(pr_id, repo?)` — close without merging. +- `pr_comment_add(pr_id, body, repo?)` — post a top-level comment. + +### Repos / branches / metadata + +- `repos_list(workspace?, count?, sort?, query?)` — workspace repos. `query` is a Bitbucket BBQL filter (e.g. `'name ~ "widget"'`). +- `repo_show(repo?)` — single-repo metadata. +- `branches_list(repo?, count?, sort?, query?)` — branches, default sort is most-recently-updated first. +- `branch_show(name, repo?)` — single branch detail; URL-encodes slashes in the name. +- `commits_list(repo?, branch?, count?)` — recent commits. With `branch` omitted (or `""`), lists across all branches; with a branch name, lists commits reachable from that branch. +- `vars_list(repo?, count?)` — pipeline configuration variables (with `secured` flag). +- `downloads_list(repo?, count?)` — repository download artifacts. + +### Git context (subprocess wrappers) + +- `git_current_branch(path?)` — current branch name. Detached HEAD returns the literal `"HEAD"`. +- `git_status(path?)` — structured working-tree state (branch, upstream, ahead/behind, clean, staged, modified, untracked, unmerged + `*_omitted` counts when capped). +- `git_remote_repo(path?)` — `(workspace, repo_slug)` parsed from `origin`. +- `git_recent_commits(path?, count?, ref?)` — list of recent commits with sha / short / subject / author / date. +- `git_uncommitted_changes(path?)` — `{staged_diff, working_diff, untracked_files}` (diffs capped at 1 MiB with a truncation marker; untracked file list capped at 10000 with an omitted-count sibling field). + +### Meta + +- `whoami()` — resolved user / workspace / api_base + best-effort git context. Does NOT echo BB_TOKEN. + +### Repo resolution + +Every Bitbucket tool accepts an optional `repo` argument: + +| Shape | Behavior | +|---|---| +| `""` (empty / omitted) | Auto-detect via `git remote get-url origin` from `BB_DEFAULT_REPO_PATH` (or cwd). Workspace + slug come from the remote URL. | +| `"my-repo"` | Use the configured workspace (`BB_WORKSPACE`) + `"my-repo"`. | +| `"acme/my-repo"` | Use `"acme"` as workspace + `"my-repo"` as slug — overrides `BB_WORKSPACE` for this call. | + +Malformed shapes (`"a/b/c"`, `"/repo"`, `"ws/"`, `"."`, `".."`, whitespace-only) are rejected at the boundary BEFORE any network call burns API budget. + +### Error envelope + +Every tool returns either: + +```python +{"ok": True, "workspace": ..., "repo": ..., } +{"ok": False, "kind": "", "message": ..., } +``` + +For `BBApiError`, the failure dict carries `status` + redacted `url` + `body`. For `GitOpError`, it carries `returncode` + `stderr`. All free-form text fields (`message`, `body`, `stderr`, `url`) route through a uniform redactor so embedded credentials (`https://user:token@host/...`) and signed-URL query parameters (AWS / Azure SAS / GCP signing keys / `?access_token=` / `?api_key=`) never leak through the error path into agent context or downstream logs. + +--- + +## Operating principles + +### 1. Resolve git context before invoking Bitbucket ops + +When the user gives a PR id or pipeline number without a repo, the typical resolution order is: + +1. `git_remote_repo()` to confirm the current checkout's workspace + repo. +2. `whoami()` only if the credentials feel uncertain — never as a routine check. +3. Then the Bitbucket tool with `repo=""` (auto-detect). + +If the user is clearly in a different repo than their current cwd, pass `repo="other-workspace/other-repo"` explicitly. + +### 2. Show diffs / logs before destructive operations + +Before `pr_merge`, run `pr_diff` and `pr_activity` so the user sees what they're approving. Before `pipeline_stop`, run `pipeline_show` so they can confirm the build number. Before `pr_decline`, surface the PR title + author. + +### 3. Use `pr_create` auto-detect for the common case + +When the user says "open a PR for this branch," call `pr_create(title="...")` with no `source_branch` — the wrapper auto-detects via `git rev-parse --abbrev-ref HEAD` and rejects detached-HEAD state with a clear local error. Don't fetch `git_current_branch()` separately just to pass it back in. + +### 4. Bash + Python parity discipline (for `bb`-CLI maintenance work) + +`bb` (bash) and `bb_ops` (Python) are parallel implementations of the same Bitbucket REST contract — neither wraps the other. When a defect surfaces in either side (URL construction, body shape, parameter naming, auth handling), the fix lands in both code paths. See CONTRIBUTING.md's "Bash and Python are parallel implementations" section. Tests verify the **correct** contract — don't write tests that pin existing buggy behaviour on either surface. + +### 5. Propose-first for destructive operations + +When invoking `pr_merge`, `pr_decline`, `pipeline_stop`, or `pr_unapprove` from a delegated context, surface the what / why / new-state to the user first when there's any ambiguity. `pr_approve` and `pr_comment_add` are reversible enough to fire without a propose step in normal flow. + +--- + +## Operating examples (generic) + +### Trigger a deploy pipeline with variables + +``` +pipeline_trigger( + branch="main", + pattern="deploy-prod", + variables={"REGION": "us-west-2", "DEPLOY_TAG": "v2.3"}, +) +``` + +Bitbucket creates a new build for the `deploy-prod` custom pipeline on `main` with those two variables. Wrap the call result to surface `build_number` so the user can `pipeline_logs(number, step_index=0)` if needed. + +### Open a PR from the current branch + +``` +# Auto-detects source_branch from `git rev-parse --abbrev-ref HEAD`. +pr_create( + title="Add widget cache", + description="Implements the cache layer per design doc.", + destination_branch="develop", +) +``` + +Reviewers can be supplied as a list of Bitbucket UUIDs (you can find a user's UUID via `pr_show` on a PR they've previously commented on, or by browsing the workspace in the Bitbucket UI). + +### Survey board state + +``` +# 1. Most recent pipelines. +pipelines_list(count=10) +# 2. Open PRs awaiting review. +prs_list(state="OPEN") +# 3. Recent commits on main. +commits_list(branch="main", count=20) +``` + +Summarize the result as a status snapshot ("3 PRs open, 2 with comments; pipeline #142 is the most recent build on `main`, passing") rather than dumping raw JSON. + +### Look at a single PR end-to-end + +``` +pr_show(pr_id=42) # Title, source/dest, reviewers, state. +pr_activity(pr_id=42, count=30) # Approval / comment timeline. +pr_diff(pr_id=42) # Unified diff (streamed in full; bump timeout= for very large PRs). +pr_comments_list(pr_id=42) # Inline + top-level comments. +``` + +Then summarize what the diff does, surface unresolved comments, and offer the user an approve / merge / decline / comment decision tree. + +### Investigate a failing pipeline + +``` +pipeline_show(number=142) # Headline: trigger, state, branch, duration. +pipeline_steps(number=142) # Identify which step failed. +pipeline_logs(number=142, step_index=2) # 0-based; pull the step's raw log. +``` + +Surface the log's relevant tail (last ~50 lines or the stderr region around the failure) rather than dumping the whole stream. For very large logs the call can hit `timeout=` mid-read and raise `BBApiError` — re-fetch with a longer `timeout=` if that happens. + +--- + +## Extending `bb` itself + +The CLI is two parallel implementations: + +- **`bb`** (bash) — human-facing CLI, ~1000 lines of pure bash + curl + jq. +- **`bb_api.py`** + **`bb_ops.py`** + **`git_ops.py`** + **`mcp_server.py`** (Python) — what the MCP server uses. Stdlib-only runtime (no `requests` etc.); `mcp` is the only third-party dep, installed by the self-bootstrapping venv at `~/.local/share/bitbucket-cli/venv`. + +### When delegated a `bb` change + +1. **Design.** Decide whether the change belongs in `bb` (bash), `bb_ops` (Python), or both. New end-user commands go in `bb`; new MCP-tool surface goes in `bb_ops` + a wrapper in `mcp_server.py`. Anything that the bash CLI is missing for parity should land in both — that's the dominant pattern. +2. **Implement.** + - Bash side: follow the `cmd_*` function convention (e.g. `cmd_pipelines`, `cmd_pr_create`). HTTP through `bb_get / bb_post / bb_put / bb_delete` helpers. Validation at the boundary via `repo_path` + the `_require_build_number` pattern. + - Python side: `bb_ops._(client, workspace, repo, ...)` functions. HTTP through the `BBClient` injected as the first arg. Validation at the boundary; raise `ValueError` for caller errors, let `BBApiError` propagate for API failures. + - MCP wrapper: thin `@mcp.tool()` in `mcp_server.py` that resolves repo + workspace via `_resolve_repo`, calls the bb_ops function, returns `{"ok": True, ...}` on success and `_error_dict_with(e, ...)` on failure. Thread the request identifier (pr_id, number, etc.) into the error dict for correlation. +3. **Test.** Python side gets pytest coverage; bash side is smoke-tested manually. Tests assert URL + method + body shape per call (never just response status — that's the "mock-returns-success-regardless-of-body" anti-pattern). Boundary-rejection tests assert `opener.calls == []` to prove no network IO on bad input. +4. **Document.** Update `bb help`'s inline text. Update README.md if the surface changes. Update this agent file's tool-surface table if a new MCP tool ships. +5. **PR.** Open against `develop`. The Claude review + security review fire automatically. Iterate on findings; merge when convergence is reached. + +### Hard rules during `bb` development + +- **Tests verify correct behaviour, not existing bugs.** If a test would only pass against current buggy logic, fix the code, not the test. +- **Bash + Python parity.** A defect surfaced by a Python test fixes both the Python module AND the bash command if both implement the same operation. +- **No personal data in tracked files.** Examples / fixtures / docstrings use fictional names (Alice Garcia, Bob Jones), generic workspaces (`acme`, `widget-co`), and RFC-reserved emails (`user@example.com`). Real workspace slugs, real ticket titles, real org names go in your own private copy of this agent file (if you keep one), never in the upstream-tracked version. +- **Secrets never echo.** `whoami` reports the user but never the token. Error dicts route every string field through the redactor (URL credentials AND signed-URL query parameters get stripped). The `Variables:` echo in `pipeline_trigger` masks values as `KEY=***`. + +--- + +## Project-specific conventions (placeholder — fill this in per project) + +When this agent is delegated work in a specific project, capture the project's conventions here so they survive across sessions: + +- **Default workspace:** _(e.g. `acme`)_ +- **Default destination branch:** _(e.g. `develop`)_ +- **Required reviewers:** _(e.g. team-lead handle)_ +- **Custom pipeline patterns:** _(e.g. `deploy-staging`, `deploy-prod`, `nightly-integration`)_ +- **PR title conventions:** _(e.g. `[TICKET-123] Add X` / `feat: …`)_ +- **Branch naming:** _(e.g. `feat/`, `fix/`, `chore/`)_ +- **Pipeline variables that are sensitive:** _(list so they aren't echoed in plaintext when surfacing trigger results)_ + +The fields above are intentionally blank in the bundled template — fill them in your own private copy after installing, never in the upstream-tracked version. diff --git a/bb b/bb index 48b1def..b2408e5 100755 --- a/bb +++ b/bb @@ -110,9 +110,60 @@ detect_repo() { repo_path() { local repo="$1" + # Validate inputs at the boundary so a malformed slug doesn't + # silently construct a wrong URL (/repositories//foo or + # /repositories/../foo). Mirrors the bb_api.repo_path Python + # validation — both surfaces enforce the same contract. + # + # Whitespace check: reject if the value EQUALS its whitespace- + # stripped form's emptiness. Catches all-whitespace AND mixed- + # whitespace (e.g. ` acme ` which the previous `^[[:space:]]+$` + # regex let through). `tr -d '[:space:]'` is true parity with + # Python's `.strip()`. + local _ws_stripped _repo_stripped + _ws_stripped="$(printf '%s' "$BB_WORKSPACE" | tr -d '[:space:]')" + _repo_stripped="$(printf '%s' "$repo" | tr -d '[:space:]')" + if [[ -z "$_ws_stripped" || "$_ws_stripped" != "$BB_WORKSPACE" ]]; then + echo "Error: BB_WORKSPACE must be a non-empty, non-whitespace string." >&2 + kill -TERM $$ + fi + if [[ -z "$_repo_stripped" || "$_repo_stripped" != "$repo" ]]; then + echo "Error: repo must be a non-empty, non-whitespace string." >&2 + kill -TERM $$ + fi + if [[ "$BB_WORKSPACE" == *"/"* || "$repo" == *"/"* ]]; then + echo "Error: workspace and repo must not contain '/'." >&2 + kill -TERM $$ + fi + if [[ "$BB_WORKSPACE" == "." || "$BB_WORKSPACE" == ".." ]]; then + echo "Error: workspace must not be '.' or '..'." >&2 + kill -TERM $$ + fi + if [[ "$repo" == "." || "$repo" == ".." ]]; then + echo "Error: repo must not be '.' or '..'." >&2 + kill -TERM $$ + fi + # `exit 1` inside a `$(repo_path ...)` command substitution only + # terminates the subshell — the caller would proceed with an + # empty path. `kill -TERM $$` terminates the parent script so + # the validation actually halts execution. echo "/repositories/${BB_WORKSPACE}/${repo}" } +# Validate a build_number argument is a positive integer before +# splicing into a jq filter or URL. jq treats unquoted non-numeric +# identifiers as undefined-function references (e.g. `select(.x == abc)` +# becomes `abc/0 is not defined`), aborting under `set -e` with no +# curated error. A crafted value like `1) | $ENV.BB_TOKEN, .uuid` can +# also exfil environment via the jq filter's $ENV — validate the shape +# at the boundary so neither failure mode can fire. +_require_build_number() { + if ! [[ "$1" =~ ^[0-9]+$ ]]; then + echo "Error: build_number must be a positive integer (got ${1!r:-empty})." >&2 + exit 1 + fi +} + # --- Formatting helpers --- format_state() { @@ -202,9 +253,13 @@ cmd_pipeline() { echo "Usage: bb pipeline [repo] " >&2 exit 1 fi + _require_build_number "$build_number" + # Parity fix: bumped pagelen 50→100 (Bitbucket's max). Older + # pipelines still unfindable beyond 100; full pagination is + # the Python-side improvement. local response - response=$(bb_get "$(repo_path "$repo")/pipelines/?sort=-created_on&pagelen=50") + response=$(bb_get "$(repo_path "$repo")/pipelines/?sort=-created_on&pagelen=100") local pipeline_uuid pipeline_uuid=$(echo "$response" | jq -r ".values[] | select(.build_number == ${build_number}) | .uuid" | tr -d '{}') @@ -254,13 +309,19 @@ cmd_watch() { build_number=$(echo "$latest" | jq -r '.values[0].build_number') echo "Watching most recent pipeline: #${build_number}" fi + _require_build_number "$build_number" echo "Watching pipeline #${build_number} on ${BB_WORKSPACE}/${repo} (every ${poll_interval}s)..." echo "" while true; do + # Parity fix: bumped pagelen 50 → 100, symmetric with + # cmd_pipeline / cmd_pipeline_stop / cmd_logs. Without this + # bump, a pipeline at positions 51-100 in the recent list + # would never match here and the watch loop would spin + # forever printing blanks. local response - response=$(bb_get "$(repo_path "$repo")/pipelines/?sort=-created_on&pagelen=50") + response=$(bb_get "$(repo_path "$repo")/pipelines/?sort=-created_on&pagelen=100") local state result duration ref IFS=$'\t' read -r state result duration ref < <(echo "$response" | jq -r " @@ -304,9 +365,12 @@ cmd_logs() { echo "Usage: bb logs [repo] [step-index]" >&2 exit 1 fi + _require_build_number "$build_number" + # Parity fix: bumped pagelen 50→100 (Bitbucket's max). Symmetric + # with cmd_pipeline_stop / cmd_pipeline. local response - response=$(bb_get "$(repo_path "$repo")/pipelines/?sort=-created_on&pagelen=50") + response=$(bb_get "$(repo_path "$repo")/pipelines/?sort=-created_on&pagelen=100") local pipeline_uuid pipeline_uuid=$(echo "$response" | jq -r ".values[] | select(.build_number == ${build_number}) | .uuid" | tr -d '{}') @@ -355,39 +419,82 @@ cmd_pipeline_trigger() { repo=$(detect_repo "${1:-}") local branch="${2:-}" local pattern="${3:-}" - shift 3 2>/dev/null || true - # Remaining args are VAR=VALUE pairs + # `shift 3 || true` previously masked the under-3-args case: + # bash leaves $@ unchanged when the shift count exceeds $#, so + # `bb trigger myrepo` (1 arg) left "myrepo" in $@ and the + # var-loop below parsed it as a VAR=VALUE pair, sending + # {"key":"myrepo","value":"myrepo"} as a pipeline variable. + # Guard explicitly. + if [[ $# -ge 3 ]]; then + shift 3 + else + # Consume what's there; remaining $@ is empty. + shift $# + fi + + # Remaining args are VAR=VALUE pairs. Build the array via `jq` + # so values containing `"`, `\`, newlines, or tabs are correctly + # JSON-escaped. NUL delimiter (not newline) so values containing + # newlines aren't fragmented into ghost vars — the previous + # newline-split approach would turn VAR=$'line1\nline2' into a + # real {VAR:line1} entry plus a ghost {line2:""} entry. jq's + # split("") on a NUL-delimited stream sidesteps that. local variables="[]" if [[ $# -gt 0 ]]; then - variables="[" - local first=true - for var_pair in "$@"; do - local key="${var_pair%%=*}" - local value="${var_pair#*=}" - if [[ "$first" == "true" ]]; then - first=false - else - variables+="," - fi - variables+="{\"key\":\"${key}\",\"value\":\"${value}\"}" - done - variables+="]" + # Per-pair shape: split on the FIRST `=` only so values + # containing `=` survive intact. + variables=$(printf '%s\0' "$@" | jq -Rs ' + split("") + | map(select(length > 0)) + | map( + split("=") | { + key: .[0], + value: (.[1:] | join("=")) + } + ) + ') fi if [[ -z "$branch" ]]; then branch=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "main") + # git rev-parse exits 0 with literal "HEAD" on detached HEAD — + # Bitbucket would 400 on ref_name=HEAD. Surface a clean error. + if [[ "$branch" == "HEAD" ]]; then + echo "Error: detached HEAD detected. Pass an explicit branch." >&2 + exit 1 + fi fi + # Parity fix: previously the no-pattern branch built `{target: {...}}` + # WITHOUT the variables field, silently dropping any VAR=value args + # the user passed. Build the payload incrementally so variables always + # land in the request when provided, regardless of pattern. local payload - if [[ -n "$pattern" ]]; then - payload=$(jq -n \ - --arg ref "$branch" \ - --arg pat "$pattern" \ - --argjson vars "$variables" \ - '{target: {ref_name: $ref, ref_type: "branch", selector: {type: "custom", pattern: $pat}}, variables: $vars}') + if [[ "$variables" != "[]" ]]; then + if [[ -n "$pattern" ]]; then + payload=$(jq -n \ + --arg ref "$branch" \ + --arg pat "$pattern" \ + --argjson vars "$variables" \ + '{target: {ref_name: $ref, ref_type: "branch", selector: {type: "custom", pattern: $pat}}, variables: $vars}') + else + payload=$(jq -n \ + --arg ref "$branch" \ + --argjson vars "$variables" \ + '{target: {ref_name: $ref, ref_type: "branch"}, variables: $vars}') + fi else - payload=$(jq -n --arg ref "$branch" '{target: {ref_name: $ref, ref_type: "branch"}}') + # No variables → omit the key entirely (matches Python's + # omit-when-empty contract). + if [[ -n "$pattern" ]]; then + payload=$(jq -n \ + --arg ref "$branch" \ + --arg pat "$pattern" \ + '{target: {ref_name: $ref, ref_type: "branch", selector: {type: "custom", pattern: $pat}}}') + else + payload=$(jq -n --arg ref "$branch" '{target: {ref_name: $ref, ref_type: "branch"}}') + fi fi echo "Triggering pipeline on ${BB_WORKSPACE}/${repo} branch ${branch}..." @@ -395,11 +502,27 @@ cmd_pipeline_trigger() { echo " Custom pipeline: ${pattern}" fi if [[ "$variables" != "[]" ]]; then - echo " Variables: $*" + # Echo variable KEYS only — values may be secrets (API tokens, + # deploy creds) that the user passed as VAR=value. The previous + # `Variables: $*` form leaked the full value into stdout / + # terminal scrollback / CI logs / shell history. + local masked + masked=$(printf '%s\n' "$@" | sed -E 's/=.*$/=***/' | tr '\n' ' ') + echo " Variables: ${masked%% }" fi + # rc-capture pattern: capture the exit code so a 4xx (protected + # branch, custom pipeline name not found, invalid variable shape) + # surfaces as a labelled error instead of `set -e` silently + # aborting after the "Triggering pipeline..." banner. local response - response=$(bb_post "$(repo_path "$repo")/pipelines/" "$payload") + if ! response=$(bb_post "$(repo_path "$repo")/pipelines/" "$payload"); then + local rc=$? + echo "Trigger request failed for ${BB_WORKSPACE}/${repo} branch ${branch} (exit $rc)." >&2 + echo " Common causes: protected branch, custom pipeline name not" >&2 + echo " found, or invalid variable shape." >&2 + exit $rc + fi local build_num build_num=$(echo "$response" | jq -r '.build_number') @@ -417,20 +540,34 @@ cmd_pipeline_stop() { echo "Usage: bb pipeline-stop [repo] " >&2 exit 1 fi + _require_build_number "$build_number" + # Parity fix: previously scanned only 50 most-recent pipelines, so + # older builds were unfindable. Bumped to 100 (Bitbucket's max + # pagelen) which covers ~2x more in a single page without + # implementing full pagination in bash. The Python side paginates + # up to 2000 — this is a partial parity step. local response - response=$(bb_get "$(repo_path "$repo")/pipelines/?sort=-created_on&pagelen=50") + response=$(bb_get "$(repo_path "$repo")/pipelines/?sort=-created_on&pagelen=100") local pipeline_uuid pipeline_uuid=$(echo "$response" | jq -r ".values[] | select(.build_number == ${build_number}) | .uuid" | tr -d '{}') if [[ -z "$pipeline_uuid" ]]; then - echo "Pipeline #${build_number} not found." >&2 + echo "Pipeline #${build_number} not found in the 100 most-recent pipelines." >&2 exit 1 fi - bb_post "$(repo_path "$repo")/pipelines/%7B${pipeline_uuid}%7D/stopPipeline" > /dev/null - echo "Stopped pipeline #${build_number}" + # Parity fix: previously discarded the API response with > /dev/null. + # Now capture exit code so a stop failure (already-stopped pipeline, + # 4xx, etc.) surfaces to the user instead of being masked. + if bb_post "$(repo_path "$repo")/pipelines/%7B${pipeline_uuid}%7D/stopPipeline" > /dev/null; then + echo "Stopped pipeline #${build_number}" + else + local rc=$? + echo "Stop request failed for pipeline #${build_number} (exit $rc)." >&2 + exit $rc + fi } cmd_pipeline_approve() { @@ -471,7 +608,16 @@ cmd_pr_list() { count=$(echo "$response" | jq '.size') if [[ "$count" == "0" ]]; then - echo " No ${state,,} pull requests." + # Use `tr` instead of `${state,,}` for bash 3.x compatibility. + # macOS ships bash 3.2 at /bin/bash and `#!/usr/bin/env bash` + # finds it before Homebrew bash on the default PATH, so the + # lowercase-substitution syntax would fail this branch only + # (when count==0) — and only on macOS where it bites hardest. + # Use printf '%s' rather than echo to neutralize leading-dash + # values (echo would interpret `-n` / `-e` / `-E` as flags); not + # a real risk for Bitbucket states (OPEN/MERGED/DECLINED/...) + # but bulletproof and only marginally longer. + echo " No $(printf '%s' "$state" | tr '[:upper:]' '[:lower:]') pull requests." return fi @@ -571,29 +717,58 @@ cmd_pr_create() { exit 1 fi - # Read description from stdin if piped, otherwise empty + # Read description from stdin if piped, otherwise empty. local description="" if [[ ! -t 0 ]]; then description=$(cat) fi + # Parity fix: omit `description` from the payload when empty so it + # matches the Python omit-when-empty contract (bb_ops.pr_create). + # close_source_branch hardcoded to true matches bb_ops's default + # too; both surfaces could expose this as a flag in a future PR. local payload - payload=$(jq -n \ - --arg title "$title" \ - --arg desc "$description" \ - --arg src "$source_branch" \ - --arg dst "$dest" \ - '{ - title: $title, - description: $desc, - source: {branch: {name: $src}}, - destination: {branch: {name: $dst}}, - close_source_branch: true - }') + if [[ -n "$description" ]]; then + payload=$(jq -n \ + --arg title "$title" \ + --arg desc "$description" \ + --arg src "$source_branch" \ + --arg dst "$dest" \ + '{ + title: $title, + description: $desc, + source: {branch: {name: $src}}, + destination: {branch: {name: $dst}}, + close_source_branch: true + }') + else + payload=$(jq -n \ + --arg title "$title" \ + --arg src "$source_branch" \ + --arg dst "$dest" \ + '{ + title: $title, + source: {branch: {name: $src}}, + destination: {branch: {name: $dst}}, + close_source_branch: true + }') + fi echo "Creating PR: ${source_branch} -> ${dest}" + + # rc-capture pattern: a 400 (typo in dest branch, duplicate PR + # already open, source branch not pushed, etc.) makes bb_post + # exit non-zero and `set -e` would silently abort after the + # "Creating PR:" banner. Without a labelled error, a user + # retrying assuming a network blip might create a duplicate. local response - response=$(bb_post "$(repo_path "$repo")/pullrequests" "$payload") + if ! response=$(bb_post "$(repo_path "$repo")/pullrequests" "$payload"); then + local rc=$? + echo "PR-create request failed (exit $rc)." >&2 + echo " Common causes: dest branch typo, a PR with this source" >&2 + echo " branch is already open, source branch not pushed." >&2 + exit $rc + fi local pr_id pr_url pr_id=$(echo "$response" | jq -r '.id') @@ -613,8 +788,37 @@ cmd_pr_approve() { exit 1 fi - bb_post "$(repo_path "$repo")/pullrequests/${pr_id}/approve" > /dev/null - echo "Approved PR #${pr_id}" + # Parity fix: capture exit code so an approve failure (already + # approved, 4xx, etc.) surfaces to the user instead of being + # masked by the unconditional success print. + if bb_post "$(repo_path "$repo")/pullrequests/${pr_id}/approve" > /dev/null; then + echo "Approved PR #${pr_id}" + else + local rc=$? + echo "Approve request failed for PR #${pr_id} (exit $rc)." >&2 + exit $rc + fi +} + +cmd_pr_unapprove() { + local repo + repo=$(detect_repo "${1:-}") + local pr_id="${2:-}" + + if [[ -z "$pr_id" ]]; then + echo "Usage: bb pr-unapprove [repo] " >&2 + exit 1 + fi + + # Bitbucket's contract: DELETE the same /approve subpath that + # POST uses for approval. Parity with bb_ops.pr_unapprove. + if bb_delete "$(repo_path "$repo")/pullrequests/${pr_id}/approve" > /dev/null; then + echo "Removed approval on PR #${pr_id}" + else + local rc=$? + echo "Unapprove request failed for PR #${pr_id} (exit $rc)." >&2 + exit $rc + fi } cmd_pr_merge() { @@ -629,14 +833,34 @@ cmd_pr_merge() { exit 1 fi + # Validate strategy against Bitbucket's accepted set so a typo + # (e.g. "squash_commit") fails locally with a clear message + # instead of getting an opaque 400 from the API. + case "$strategy" in + merge_commit|squash|fast_forward) ;; + *) + echo "Error: invalid strategy '${strategy}'." >&2 + echo " Valid: merge_commit, squash, fast_forward." >&2 + exit 1 + ;; + esac + local payload payload=$(jq -n --arg strategy "$strategy" \ '{type: "pullrequest", merge_strategy: $strategy, close_source_branch: true}') - local response - response=$(bb_put "$(repo_path "$repo")/pullrequests/${pr_id}/merge" "$payload") - - echo "Merged PR #${pr_id} (${strategy})" + # Parity fix: capture exit code so a merge conflict / failing + # required check / wrong-strategy-for-repo error surfaces as a + # labelled error instead of silently aborting under set -e. + if bb_put "$(repo_path "$repo")/pullrequests/${pr_id}/merge" "$payload" > /dev/null; then + echo "Merged PR #${pr_id} (${strategy})" + else + local rc=$? + echo "Merge request failed for PR #${pr_id} (exit $rc)." >&2 + echo " Common causes: unresolved comments, failing required" >&2 + echo " builds, or wrong merge strategy for this repo." >&2 + exit $rc + fi } cmd_pr_decline() { @@ -649,8 +873,14 @@ cmd_pr_decline() { exit 1 fi - bb_post "$(repo_path "$repo")/pullrequests/${pr_id}/decline" > /dev/null - echo "Declined PR #${pr_id}" + # Parity fix: capture exit code (was previously discarded). + if bb_post "$(repo_path "$repo")/pullrequests/${pr_id}/decline" > /dev/null; then + echo "Declined PR #${pr_id}" + else + local rc=$? + echo "Decline request failed for PR #${pr_id} (exit $rc)." >&2 + exit $rc + fi } cmd_pr_diff() { @@ -663,7 +893,12 @@ cmd_pr_diff() { exit 1 fi - curl -sf -u "${BB_USER}:${BB_TOKEN}" \ + # Parity fix: -L so we follow any redirect Bitbucket introduces on + # this endpoint (today it serves inline, but future-proofing). + # `curl -L -u` does NOT resend credentials to a different host by + # default, so the cross-host credential-leak concern is already + # mitigated by curl semantics. + curl -sfL -u "${BB_USER}:${BB_TOKEN}" \ "${BB_API}$(repo_path "$repo")/pullrequests/${pr_id}/diff" } @@ -691,10 +926,50 @@ cmd_pr_comments() { ' } +cmd_pr_comment_add() { + local repo + repo=$(detect_repo "${1:-}") + local pr_id="${2:-}" + local body="${3:-}" + + if [[ -z "$pr_id" || -z "$body" ]]; then + echo "Usage: bb pr-comment [repo] " >&2 + echo "" >&2 + echo " Add a top-level comment to PR #." >&2 + echo " Use single quotes around if it contains spaces or shell metacharacters." >&2 + exit 1 + fi + + # Bitbucket's contract: POST {"content": {"raw": ""}}. + # Parity with bb_ops.pr_comment_add. + local payload + payload=$(jq -n --arg body "$body" '{content: {raw: $body}}') + + local response + response=$(bb_post "$(repo_path "$repo")/pullrequests/${pr_id}/comments" "$payload") + + local comment_id + comment_id=$(echo "$response" | jq -r '.id // empty') + if [[ -n "$comment_id" ]]; then + echo "Posted comment #${comment_id} on PR #${pr_id}" + else + echo "Comment posted (response did not include an id)." >&2 + echo "$response" >&2 + exit 1 + fi +} + # ========================================================================= # BRANCH COMMANDS # ========================================================================= +# URL-encode a single path segment (for branch names containing `/`). +# jq's @uri filter does this correctly; using jq avoids a Python / +# Perl dependency. +_url_encode_segment() { + printf '%s' "$1" | jq -sRr @uri +} + cmd_branches() { local repo repo=$(detect_repo "${1:-}") @@ -721,6 +996,74 @@ cmd_branches() { done } +cmd_branch_show() { + local repo + repo=$(detect_repo "${1:-}") + local name="${2:-}" + + if [[ -z "$name" ]]; then + echo "Usage: bb branch [repo] " >&2 + echo "" >&2 + echo " Show details for a single branch. Branch names with '/'" >&2 + echo " (e.g. feat/widget) are URL-encoded automatically." >&2 + exit 1 + fi + + # URL-encode the name so feat/widget isn't interpreted as a + # sub-resource path by Bitbucket. Mirrors bb_ops.branch_show. + local encoded + encoded=$(_url_encode_segment "$name") + + local response + response=$(bb_get "$(repo_path "$repo")/refs/branches/${encoded}") + + echo "Branch ${name} on ${BB_WORKSPACE}/${repo}:" + echo "" + echo "$response" | jq -r ' + " Name: " + .name, + " Hash: " + .target.hash[:12], + " Date: " + .target.date, + " Author: " + (.target.author.user.display_name // .target.author.raw // "unknown"), + " Message: " + (.target.message // "(empty)" | split("\n") | .[0]) + ' +} + +cmd_commits() { + local repo + repo=$(detect_repo "${1:-}") + local branch="${2:-}" + local count="${3:-10}" + + local path response + if [[ -n "$branch" ]]; then + local encoded + encoded=$(_url_encode_segment "$branch") + path="$(repo_path "$repo")/commits/${encoded}?pagelen=${count}" + echo "Recent commits on ${BB_WORKSPACE}/${repo} branch ${branch}:" + else + path="$(repo_path "$repo")/commits?pagelen=${count}" + echo "Recent commits on ${BB_WORKSPACE}/${repo} (all branches):" + fi + echo "" + + response=$(bb_get "$path") + + printf " %-10s %-12s %-22s %s\n" "HASH" "DATE" "AUTHOR" "MESSAGE" + printf " %-10s %-12s %-22s %s\n" "----" "----" "------" "-------" + + echo "$response" | jq -r ' + .values[] | + [ + .hash[:8], + (.date | split("T") | .[0]), + (.author.user.display_name // .author.raw // "unknown" | .[:20]), + (.message // "(empty)" | split("\n") | .[0] | .[:60]) + ] | @tsv + ' | while IFS=$'\t' read -r hash date author msg; do + printf " %-10s %-12s %-22s %s\n" "$hash" "$date" "$author" "$msg" + done +} + # ========================================================================= # REPOSITORY COMMANDS # ========================================================================= @@ -838,6 +1181,64 @@ cmd_vars() { # OPEN IN BROWSER # ========================================================================= +cmd_whoami() { + # Report the resolved config + git context. Useful as a connectivity + # smoke test before invasive operations. NEVER echoes BB_TOKEN and + # NEVER echoes credentials embedded in the origin URL (a common + # pattern for token-based git auth — `whoami` output gets pasted + # into bug reports / screenshots, can't have secrets in it). + echo "bb configuration:" + echo " User: ${BB_USER}" + echo " Workspace: ${BB_WORKSPACE}" + echo " API: ${BB_API}" + echo " Token: [set, redacted]" + + echo "" + echo "Git context:" + local cwd_branch cwd_remote + cwd_branch=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "(not a git repo)") + # Strip `user:token@` from origin URL before echoing. Matches the + # `_redact_url` helper in mcp_server.py / git_ops.py: anything of + # the form `://user:token@host/path` becomes `://[redacted]@host/path`. + # + # Use an `if` block (not `git ... | sed ... || echo`) because in a + # pipeline the `||` attaches to sed, which always exits 0 on + # empty input — so the fallback would never fire when git itself + # failed (no origin remote / not in a git repo). The if-form + # branches on git's exit status, not sed's. + local _git_remote_raw + if _git_remote_raw=$(git remote get-url origin 2>/dev/null); then + cwd_remote=$(printf '%s' "$_git_remote_raw" | sed -E 's#://[^/@]+@#://[redacted]@#') + else + cwd_remote="(no origin remote)" + fi + echo " Cwd: $(pwd)" + echo " Branch: ${cwd_branch}" + echo " Origin: ${cwd_remote}" + + # Light reachability check. Probe the configured workspace + # endpoint (not /user — workspace-scoped tokens, which Atlassian + # now recommends, reject /user with 401/403 while serving + # /repositories/{workspace} correctly). False-negative on /user + # would tell a user with a valid token to rotate it. + # + # Converse caveat: /repositories/{workspace} requires + # `repository:read` scope. A workspace-scoped token granting only + # `pipelines:read` or `pullrequest:read` will fail this probe even + # though `bb pipelines` / `bb prs` still work. Treat this as a + # scope hint, not a global credential verdict. + echo "" + echo "Auth check:" + if bb_get "/repositories/${BB_WORKSPACE}?pagelen=1" > /dev/null 2>&1; then + echo " Workspace reachable — auth OK." + else + echo " Workspace NOT reachable — token may be invalid, expired," + echo " scoped to a different workspace, or missing repository:read" + echo " (pipeline/PR-only scoped tokens still work for those commands)." + echo " Rotate at https://id.atlassian.com/manage-profile/security/api-tokens" + fi +} + cmd_open() { local repo repo=$(detect_repo "${1:-}") @@ -873,20 +1274,24 @@ PIPELINES bb logs [repo] [step] Show step logs bb trigger [repo] [branch] [pattern] Trigger a pipeline run bb stop [repo] Stop a running pipeline - bb approve [repo] Open pipeline in browser (manual steps require UI) + bb approve [repo] Open pipeline in browser (manual steps require UI) PULL REQUESTS bb prs [repo] [state] List PRs (default: OPEN) bb pr [repo] View PR details bb pr-create [repo] [dest] Create PR from current branch bb pr-approve [repo] <id> Approve a PR + bb pr-unapprove [repo] <id> Remove your approval on a PR bb pr-merge [repo] <id> [strategy] Merge a PR (merge_commit|squash|fast_forward) bb pr-decline [repo] <id> Decline a PR bb pr-diff [repo] <id> Show PR diff bb pr-comments [repo] <id> Show PR comments + bb pr-comment [repo] <id> <body> Add a comment to a PR BRANCHES bb branches [repo] List branches + bb branch [repo] <name> Show a single branch + bb commits [repo] [branch] [count] List recent commits (default count: 10) REPOSITORY bb repos List workspace repos @@ -895,21 +1300,25 @@ REPOSITORY bb vars [repo] List pipeline variables UTILITIES + bb whoami Show resolved config + git context bb open [repo] [section] Open in browser (pr|pipelines|branches|settings|commits) bb help Show this help GLOBAL FLAGS - -w, --workspace <name> Override workspace (default: dreamfacesbir) + -w, --workspace <name> Override BB_WORKSPACE for this invocation - Example: bb -w other-workspace repos + Example: bb -w acme repos NOTES - [repo] is auto-detected from git remote if omitted. - Config: ~/.config/bb/config or env vars BB_USER, BB_TOKEN, BB_WORKSPACE + [repo] is auto-detected from the current git remote if omitted. + Config: ~/.config/bb/config or env vars BB_USER, BB_TOKEN, BB_WORKSPACE. Auth uses Atlassian API tokens with HTTP Basic auth. BB_USER is your Bitbucket account email address. Create a token at: https://id.atlassian.com/manage-profile/security/api-tokens + + For agent-driven workflows, register the MCP server (mcp_server.py) + with Claude Code. See README.md for the install path. HELP } @@ -968,18 +1377,23 @@ case "$command" in pr|pr-view) cmd_pr_view "$@" ;; pr-create|prc) cmd_pr_create "$@" ;; pr-approve|pra) cmd_pr_approve "$@" ;; + pr-unapprove|prua) cmd_pr_unapprove "$@" ;; pr-merge|prm) cmd_pr_merge "$@" ;; pr-decline|prd) cmd_pr_decline "$@" ;; pr-diff) cmd_pr_diff "$@" ;; pr-comments|pr-comm) cmd_pr_comments "$@" ;; + pr-comment) cmd_pr_comment_add "$@" ;; # Branches branches|br) cmd_branches "$@" ;; + branch) cmd_branch_show "$@" ;; + commits) cmd_commits "$@" ;; # Repos repos) cmd_repos "$@" ;; repo) cmd_repo "$@" ;; downloads|dl) cmd_downloads "$@" ;; vars) cmd_vars "$@" ;; # Utilities + whoami) cmd_whoami ;; open|o) cmd_open "$@" ;; help|--help|-h) cmd_help ;; *) diff --git a/bb_api.py b/bb_api.py new file mode 100644 index 0000000..0bac846 --- /dev/null +++ b/bb_api.py @@ -0,0 +1,661 @@ +""" +bb_api — Bitbucket Cloud REST API client for the bb MCP server. + +This module is the Python *parallel* of the helpers at the top of the `bb` +bash script (load_config, bb_get/post/put/delete, detect_repo, repo_path). +The MCP server uses this module to talk to Bitbucket directly; it does NOT +shell out to `bb` and parse its output. + +Two pieces sit side by side: + + bb (bash) <--> Bitbucket REST API <--> bb_api (Python) + (single source + of truth) + +When a test in test_bb_api.py finds a defect in URL construction, body +shape, or auth handling, the fix lands in both bb_api.py AND `bb` if the +bash side has parallel logic. See CONTRIBUTING.md for the parity rule. + +Stdlib-only on purpose: keeps the MCP server's bootstrap fast and minimises +the supply-chain surface. urllib.request is the transport. +""" + +from __future__ import annotations + +import base64 +import json +import os +import re +import subprocess +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterator + +DEFAULT_API_BASE = "https://api.bitbucket.org/2.0" + +# Pagination caps. Bitbucket's `next` cursor walking can loop if the server +# returns a malformed page; defend against that and against runaway costs by +# refusing to walk more pages than this. 200 pages * default pagelen (typically +# 10-50) is a generous ceiling for any realistic repo. +MAX_PAGINATION_ITERATIONS = 200 + + +class BBConfigError(RuntimeError): + """Raised when required configuration is missing or unresolvable.""" + + +class BBApiError(RuntimeError): + """Raised when an API call cannot be completed. + + The common case is a non-2xx HTTP response — `status` carries the HTTP + code, `body` carries the response body (truncated in the message). The + same exception is also used as a generic transport-error wrapper for + network failures (DNS, TLS, timeout) and for malformed-response errors + surfaced by `BBClient.paginate`; in those cases `status` is `0` and + `body` is a diagnostic string. Callers branching on HTTP semantics + should check `status > 0` before dispatching by code. + """ + + def __init__(self, status: int, url: str, body: str): + super().__init__(f"HTTP {status} from {url}: {body[:500]}") + self.status = status + self.url = url + self.body = body + + +@dataclass(frozen=True) +class BBConfig: + """Resolved credentials + workspace + API base URL for a session.""" + + user: str + token: str + workspace: str + api_base: str = DEFAULT_API_BASE + + +# --- Config loading ------------------------------------------------------- + + +def _read_keyvalue_file(path: Path) -> dict[str, str]: + """Parse a shell-style KEY=value file. Mirrors what `source` would do for + the same file in bash, modulo shell substitution. Used for both + ~/.config/bb/config and an optional .env in the script directory.""" + out: dict[str, str] = {} + if not path.is_file(): + return out + for raw in path.read_text().splitlines(): + line = raw.strip() + if not line or line.startswith("#"): + continue + # Tolerate `export KEY=value` since bash users sometimes write that. + if line.startswith("export "): + line = line[len("export ") :].lstrip() + if "=" not in line: + continue + key, _, value = line.partition("=") + key = key.strip() + value = value.strip() + # Strip a single layer of matching quotes if present. + if len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"'): + value = value[1:-1] + out[key] = value + return out + + +def load_config( + *, + env: dict[str, str] | None = None, + config_path: Path | None = None, + dotenv_path: Path | None = None, +) -> BBConfig: + """Resolve BB_USER / BB_TOKEN / BB_WORKSPACE / BB_API_BASE. + + Precedence (highest first), matching the bash script's `source` order: + 1. Process environment variables (an env var set, even to "", wins). + 2. .env in the bb script's directory (`dotenv_path`). + 3. ~/.config/bb/config (`config_path`). + + The bash script `source`s ~/.config/bb/config first and `.env` second, + so .env values overwrite the home-config values. This function mirrors + that order. .env is a repo-local development override; ~/.config/bb is + user-global. + + Membership test (rather than `or` coalesce) so an explicitly-set empty + env var doesn't silently fall through to the file. The required-keys + check below still catches empty values as missing — that part matches + bash's `[[ -z "$BB_USER" ]]` check. + + Raises BBConfigError if any required key is missing or empty. + """ + env = env if env is not None else dict(os.environ) + if config_path is None: + config_path = Path.home() / ".config" / "bb" / "config" + # dotenv_path is intentionally optional; callers running outside the + # script directory don't get one by default. + + # Build the file_config in the SAME order bash sources its files so + # .env wins over the home config. Later .update() overwrites. + file_config: dict[str, str] = {} + file_config.update(_read_keyvalue_file(config_path)) + if dotenv_path is not None: + file_config.update(_read_keyvalue_file(dotenv_path)) + + def resolve(key: str) -> str | None: + if key in env: + return env[key] + return file_config.get(key) + + user = resolve("BB_USER") + token = resolve("BB_TOKEN") + workspace = resolve("BB_WORKSPACE") + api_base_raw = resolve("BB_API_BASE") + api_base = api_base_raw if api_base_raw else DEFAULT_API_BASE + # Normalise trailing slash so api_base + "/path" never produces "//path". + api_base = api_base.rstrip("/") + + missing = [k for k, v in [("BB_USER", user), ("BB_TOKEN", token), ("BB_WORKSPACE", workspace)] if not v] + if missing: + raise BBConfigError( + f"Missing required configuration: {', '.join(missing)}. " + "Set as environment variables or in ~/.config/bb/config." + ) + + # Explicit narrow (not `assert ... is not None`, which `python -O` strips). + if user is None or token is None or workspace is None: + raise BBConfigError("Internal: required key resolved to None despite missing-check.") + return BBConfig(user=user, token=token, workspace=workspace, api_base=api_base) + + +# --- Repo resolution ------------------------------------------------------ + +# Matches the trailing `workspace/repo(.git)?` of a Bitbucket remote URL. +# Handles both shapes the bash script supports: +# https://bitbucket.org/acme/widget-service.git +# git@bitbucket.org:acme/widget-service.git +# Group 1 is the workspace; group 2 is the repo slug. +_REMOTE_TAIL = re.compile(r"[:/]([^/:]+)/([^/]+?)(?:\.git)?/?$") + + +def parse_remote_url(url: str) -> tuple[str, str] | None: + """Extract (workspace, repo_slug) from a Bitbucket remote URL. + + Returns None if the URL doesn't have a recognisable trailing + `workspace/repo` pair. The bash version's sed regex only returns the + repo slug; the Python version also returns the workspace because the + MCP server uses it for cross-workspace context resolution. + + Caller contract — IMPORTANT: this function does NOT anchor to any + specific host (matches bash's loose parsing, and intentional so + enterprise / self-hosted Bitbucket deployments work). For URLs the + developer controls (`git remote get-url origin`), that's fine. For + URLs sourced from untrusted external input (e.g. webhook payload + fields like `repository.links.clone[].href`), the caller is + responsible for verifying the URL belongs to a known Bitbucket + host BEFORE feeding it here — otherwise a github.com URL silently + parses to a (workspace, repo) tuple that the MCP server would + authenticate to against Bitbucket Cloud. If a future consumer + needs that guarantee, add a `strict=True` mode here rather than + pushing the check to every call site. + """ + match = _REMOTE_TAIL.search(url.strip()) + if match is None: + return None + return match.group(1), match.group(2) + + +def detect_repo( + path: str | os.PathLike[str] | None = None, + *, + runner: Any = subprocess, +) -> str: + """Return the repo slug for the git repository at `path` (default: cwd). + + Mirrors the bash detect_repo: looks up `origin`'s URL and parses the + repo slug out of it. `runner` is an injection seam for tests so we can + mock subprocess.run without monkey-patching the module. + + Raises BBConfigError if the directory is not a git repo, or the remote + URL is unparseable. + """ + cwd = str(path) if path is not None else None + try: + result = runner.run( + ["git", "remote", "get-url", "origin"], + capture_output=True, + text=True, + cwd=cwd, + check=False, + ) + except FileNotFoundError as e: + raise BBConfigError("git executable not found on PATH") from e + + if result.returncode != 0: + raise BBConfigError( + "Not a git repository (or no `origin` remote configured). " + "Pass an explicit repo slug instead." + ) + + parsed = parse_remote_url(result.stdout) + if parsed is None: + raise BBConfigError( + f"Could not parse a repo slug from origin URL: {result.stdout!r}" + ) + _workspace, repo = parsed + return repo + + +def repo_path(workspace: str, repo: str) -> str: + """Build the Bitbucket REST path for a repo (`/repositories/{ws}/{repo}`). + + Mirrors the bash repo_path helper. Validates inputs reject: + - empty / whitespace-only values (would produce `/repositories//repo`) + - embedded `/` (would silently change path structure) + - `.` or `..` segments (path-traversal: after URL normalisation, + `/repositories/../widget` resolves to `/repositories/widget` with + the wrong workspace) + """ + for label, value in (("workspace", workspace), ("repo", repo)): + if not value or not value.strip(): + raise ValueError(f"{label} must be a non-empty, non-whitespace string. Got {value!r}.") + if "/" in value: + raise ValueError(f"{label} must not contain '/'. Got {value!r}.") + if value in (".", ".."): + raise ValueError(f"{label} must not be '.' or '..'. Got {value!r}.") + return f"/repositories/{workspace}/{repo}" + + +# --- HTTP transport ------------------------------------------------------- + + +class _NoRedirectHandler(urllib.request.HTTPRedirectHandler): + """urllib redirect handler that refuses to follow any 3xx response. + + Why: urllib's default `HTTPRedirectHandler` resubmits the original + request — including the `Authorization` header — against the + `Location` URL on a 3xx response. urllib does NOT strip the auth + header on cross-origin redirects, so a misconfigured proxy, a + DNS-hijack, or a future Bitbucket-side 3xx pointing at a different + host could leak the Basic auth credential to an arbitrary server. + + The bash script's `curl -sf` doesn't follow redirects by default + either (the `bb logs` command explicitly opts in via `-L` because + Bitbucket's log endpoint returns a 307 to S3). When bb_ops adds a + `pipeline_logs` operation later, it will need a separate code path + that follows redirects but strips `Authorization` on cross-host + hops. For every other Bitbucket REST endpoint, refusing redirects + is the correct behaviour and is what bb does today. + + Returning None from redirect_request causes urllib to surface the + 3xx as an HTTPError, which our `_request` already wraps into + BBApiError. + """ + + def redirect_request( # type: ignore[override] + self, + req: urllib.request.Request, + fp: Any, + code: int, + msg: str, + headers: Any, + newurl: str, + ) -> None: + return None + + +def _validate_query_value(key: str, value: Any) -> None: + """Reject non-scalar query values that urlencode would silently stringify. + + With `doseq=True`, urlencode iterates dicts as their keys (`{"a":"b"}` -> + `q=a`) and stringifies arbitrary objects via repr. Both produce surprising + URLs that the caller never explicitly authored. Allow only scalars and + homogeneous lists/tuples of scalars. + """ + scalar = (str, int, float, bool) + if isinstance(value, scalar): + return + if isinstance(value, (list, tuple)): + for item in value: + if not isinstance(item, scalar): + raise TypeError( + f"query[{key!r}]: list/tuple elements must be scalars " + f"(str/int/float/bool), got {type(item).__name__}" + ) + return + raise TypeError( + f"query[{key!r}]: must be scalar or list/tuple of scalars, " + f"got {type(value).__name__}" + ) + + +class BBClient: + """Thin urllib-based Bitbucket REST client. + + Constructed once per session by mcp_server.py. Each MCP tool calls a + bb_ops.<operation>(client, ...) function rather than instantiating its + own client, so config + auth state is shared. + + The public surface is intentionally narrow: get / post / put / delete / + paginate. Higher-level operations live in bb_ops.py. + + `opener` is an injection seam for tests; the default is a fresh + urllib opener with no proxy / cookie handling so the test suite + doesn't accidentally hit a real server. + + `timeout` is the default for every request. Each method accepts an + override via the `timeout=` kwarg for endpoints that legitimately take + longer (pipeline log streaming, large PR diffs). + """ + + def __init__( + self, + config: BBConfig, + *, + opener: urllib.request.OpenerDirector | None = None, + timeout: float = 30.0, + ): + self.config = config + # Default opener refuses 3xx redirects so Authorization headers + # are never resubmitted to a Location URL. Tests pass their own + # opener in via the `opener=` kwarg. + self._opener = opener or urllib.request.build_opener(_NoRedirectHandler) + self._default_timeout = timeout + # Pre-compute the Basic auth header so each request constructs + # the same string (cheap, but more importantly easy to assert on + # in tests). + creds = f"{config.user}:{config.token}".encode() + self._auth_header = "Basic " + base64.b64encode(creds).decode() + + # -- Internal request builder -- + + def _request( + self, + method: str, + path: str, + *, + json_body: Any = None, + query: dict[str, Any] | None = None, + timeout: float | None = None, + ) -> Any: + url = self.config.api_base + path + if query: + # Drop None values so callers can pass `branch=None` to mean + # "skip this query parameter." Validate the rest so a nested + # dict/object doesn't silently become a meaningless URL. + cleaned = {k: v for k, v in query.items() if v is not None} + for k, v in cleaned.items(): + _validate_query_value(k, v) + if cleaned: + url = url + "?" + urllib.parse.urlencode(cleaned, doseq=True) + + headers = { + "Authorization": self._auth_header, + "Accept": "application/json", + "User-Agent": "bb-mcp/1.0 (+https://github.com/daniel-pittman/bitbucket-cli)", + } + data: bytes | None = None + if json_body is not None: + data = json.dumps(json_body).encode("utf-8") + headers["Content-Type"] = "application/json" + + req = urllib.request.Request(url, data=data, method=method, headers=headers) + effective_timeout = timeout if timeout is not None else self._default_timeout + try: + with self._opener.open(req, timeout=effective_timeout) as resp: + body = resp.read() + # 204 No Content is a valid empty response on DELETE / some + # mutation endpoints; return None so callers can branch on it. + if not body: + return None + # The Bitbucket API returns JSON for every non-empty success + # response we care about. If a future endpoint returns + # something else (e.g. raw log text), callers should switch + # to a lower-level fetch path; we don't try to guess here. + return json.loads(body.decode("utf-8")) + except urllib.error.HTTPError as e: + body_text = "" + try: + body_text = e.read().decode("utf-8", errors="replace") + except Exception: # noqa: BLE001 - HTTPError.read can raise anything + pass + raise BBApiError(e.code, url, body_text) from e + except urllib.error.URLError as e: + # DNS failure, connection refused, TLS error, socket timeout, etc. + # HTTPError is a subclass of URLError, so the order above matters + # (HTTPError caught first). Wrap with status=0 to preserve the + # documented BBApiError contract (see class docstring). + raise BBApiError(0, url, f"network error: {e.reason}") from e + + # -- Public methods -- + + def get( + self, + path: str, + *, + query: dict[str, Any] | None = None, + timeout: float | None = None, + ) -> Any: + return self._request("GET", path, query=query, timeout=timeout) + + def post( + self, + path: str, + *, + json_body: Any = None, + timeout: float | None = None, + ) -> Any: + return self._request("POST", path, json_body=json_body, timeout=timeout) + + def put( + self, + path: str, + *, + json_body: Any = None, + timeout: float | None = None, + ) -> Any: + return self._request("PUT", path, json_body=json_body, timeout=timeout) + + def delete(self, path: str, *, timeout: float | None = None) -> Any: + return self._request("DELETE", path, timeout=timeout) + + def fetch_redirected_text( + self, + path: str, + *, + max_redirects: int = 5, + timeout: float | None = None, + ) -> str: + """Fetch raw text from an endpoint that may return a 3xx redirect + to an external host (e.g. pipeline-log download → S3 signed URL). + + Why this exists alongside the regular `get` path: + + * The default opener refuses 3xx (see _NoRedirectHandler). The + log endpoint at /pipelines/{uuid}/steps/{uuid}/log returns + either an inline log body (200) OR a 307 to a signed S3 URL. + The 307 case has to be followed to retrieve the log. + + * We must NEVER send the Bitbucket `Authorization` header to S3. + The signed URL has its own auth via the `Signature` query + parameter; S3 will reject the request if Basic auth is also + present, and even when it didn't, sending the credential to + an arbitrary host is a credential-leak. + + * The log body is plain text, not JSON. The regular `get` path + assumes JSON. + + Implementation: open with the default opener (refuses redirects); + on a 3xx HTTPError, extract `Location`, build a fresh Request + WITHOUT Authorization, and follow up to `max_redirects` hops. + Cross-host hops are always allowed (the whole point of this + method is to follow Bitbucket -> S3); same-host hops keep the + auth header in case Bitbucket itself ever redirects internally. + + Symmetry with bash: `bb logs` uses `curl -sfL -u user:tok`, and + curl's `--location` does NOT resend `-u` credentials on a + cross-host redirect (only `--location-trusted` would, or a + custom `-H Authorization` header). So the bash side is also + safe today. We don't rely on that behaviour — the MCP server + may later need header-based auth where the curl analogue would + leak, and the explicit Python check is the symmetric, future- + proof version. + + Returns the body of the final 200 response as a decoded string. + Raises BBApiError on too-many-redirects, missing Location header, + non-3xx HTTP errors, or transport failures. + """ + url = self.config.api_base + path + # The first request carries Bitbucket auth. We rebuild headers on + # each hop so a cross-host redirect can drop the credential. + # Hostname compared case-insensitively per RFC 3986 §3.2.2 so + # `API.bitbucket.org` doesn't trigger a needless auth-strip on a + # capitalisation-only difference. + bitbucket_host = urllib.parse.urlparse(self.config.api_base).netloc.lower() + send_auth = True + + for hop in range(max_redirects + 1): + headers = { + "Accept": "*/*", + "User-Agent": "bb-mcp/1.0 (+https://github.com/daniel-pittman/bitbucket-cli)", + } + if send_auth: + headers["Authorization"] = self._auth_header + + req = urllib.request.Request(url, method="GET", headers=headers) + effective_timeout = timeout if timeout is not None else self._default_timeout + try: + with self._opener.open(req, timeout=effective_timeout) as resp: + body = resp.read() + return body.decode("utf-8", errors="replace") + except urllib.error.HTTPError as e: + try: + if e.code not in (301, 302, 303, 307, 308): + body_text = "" + try: + body_text = e.read().decode("utf-8", errors="replace") + except Exception: # noqa: BLE001 + pass + raise BBApiError(e.code, url, body_text) from e + # 3xx: extract Location, follow it. urllib's HTTPError + # exposes the response headers via e.headers. + location = e.headers.get("Location") if e.headers else None + if not location: + raise BBApiError( + e.code, url, "redirect response missing Location header" + ) from e + new_url = urllib.parse.urljoin(url, location) + new_host = urllib.parse.urlparse(new_url).netloc.lower() + # Strip auth on any cross-host hop. Once stripped, keep it + # stripped for all subsequent hops in this chain. + if new_host != bitbucket_host: + send_auth = False + url = new_url + finally: + # Explicit close so the underlying socket is released + # immediately on every HTTPError path (both the 3xx + # redirect-extract branch and the non-3xx error-with- + # body branch). GC would do this eventually but + # explicit-is-better under load. + try: + e.close() + except Exception: # noqa: BLE001 + pass + except urllib.error.URLError as e: + raise BBApiError(0, url, f"network error: {e.reason}") from e + + raise BBApiError( + 0, url, f"redirect chain exceeded {max_redirects} hops" + ) + + def paginate( + self, + path: str, + *, + query: dict[str, Any] | None = None, + max_iterations: int = MAX_PAGINATION_ITERATIONS, + ) -> Iterator[Any]: + """Walk a Bitbucket paginated endpoint, yielding each item in `values`. + + Bitbucket Cloud's pagination shape: + { "values": [...], "next": "https://api.bitbucket.org/2.0/...?page=2", ... } + + Defends against two failure modes: + * Stuck cursor: if `next` doesn't change between iterations, stop. + * Runaway: if we walk more than `max_iterations` pages, raise. + + The first page uses `path` + `query`; subsequent pages use the full + URL from `next`, which already includes the relevant query string. + """ + url: str | None = None + last_next: str | None = None + # Acceptable separators after api_base in a continuation URL. A bare + # `startswith(api_base)` is a *string* match that lets + # `https://api.bitbucket.org/2.0evil.example.com/...` slip past, so + # we require the next character to be a path or query separator. + base_with_path = self.config.api_base + "/" + base_with_query = self.config.api_base + "?" + + for iteration in range(max_iterations): + if url is None: + # First page uses caller's `path` + `query` (already in + # closure as the `query` parameter — no second variable + # needed since subsequent pages route through the `next` + # URL which carries any continuation params itself). + payload = self._request("GET", path, query=query) + else: + # Strip the api_base off `next` so _request can re-add it; + # this keeps every request going through the same code path + # and means tests don't need to special-case page-2 URLs. + if url.startswith(base_with_path) or url.startswith(base_with_query): + rel = url[len(self.config.api_base) :] + else: + # Bitbucket's `next` should always start with our base + # followed by `/` or `?`. Anything else (different host, + # or the prefix-trick where api_base is followed by + # arbitrary characters) is refused rather than followed. + raise BBApiError( + 0, + url, + f"pagination cursor host mismatch (expected {self.config.api_base})", + ) + payload = self._request("GET", rel) + + if not isinstance(payload, dict): + raise BBApiError( + 0, + url or (self.config.api_base + path), + f"expected dict from paginated endpoint, got {type(payload).__name__}", + ) + + if "values" not in payload: + raise BBApiError( + 0, + url or (self.config.api_base + path), + "paginated response missing 'values' key", + ) + + for item in payload["values"]: + yield item + + next_url = payload.get("next") + if not next_url: + return + if not isinstance(next_url, str): + raise BBApiError( + 0, + url or (self.config.api_base + path), + f"paginated response 'next' must be a string, got {type(next_url).__name__}", + ) + if next_url == last_next: + # Stuck cursor — server returned the same `next` again. + return + last_next = next_url + url = next_url + + raise BBApiError( + 0, + url or (self.config.api_base + path), + f"pagination exceeded {max_iterations} pages without terminating", + ) diff --git a/bb_ops.py b/bb_ops.py new file mode 100644 index 0000000..e09400f --- /dev/null +++ b/bb_ops.py @@ -0,0 +1,913 @@ +""" +bb_ops — Bitbucket REST operations grouped by resource. + +The MCP server (mcp_server.py, future PR) wires each function here to a +tool. Each function takes a BBClient as its first positional argument and +returns native Python data (dicts, lists, strings) — no terminal-style +formatting, no colour codes, no parsing of bash output. + +`bb` (bash) and bb_ops (Python) are parallel implementations of the same +Bitbucket REST contract. See CONTRIBUTING.md for the parity rule: when a +defect surfaces in either side, the fix lands in both code paths. + +Current scope: pipelines, pull requests, repos/branches/vars/downloads/ +commits. The companion git_ops module provides the git-context wrappers +the MCP server uses to resolve "current branch" / "remote workspace" +before invoking these ops. +""" + +from __future__ import annotations + +from typing import Any, Iterable +from urllib.parse import quote + +from bb_api import BBApiError, BBClient, repo_path + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +# Bitbucket Cloud caps pagelen at 100 server-side. Asking for more is +# silently truncated, which would create a confusing partial result. Clamp +# explicitly so the caller's intent (cap N items, paginate if needed) is +# preserved. +_BITBUCKET_MAX_PAGELEN = 100 + + +def _is_positive_int(value: Any) -> bool: + """True iff `value` is an int (NOT a bool) >= 1. + + `bool` is a subclass of `int` in Python, so `isinstance(True, int)` is + True and `True < 1` is False — meaning a bare `isinstance(x, int) and + x >= 1` check happily accepts `True` as `1`. That then propagates + through f-string interpolation into URLs as the literal `"True"`, and + through `urlencode({"pagelen": True})` as `"pagelen=True"` — both + failure modes the boundary validator exists to prevent. + """ + return isinstance(value, int) and not isinstance(value, bool) and value >= 1 + +# When resolving a build_number -> uuid, we walk the pipelines list sorted +# by most-recent-first. This cap bounds how far back we look before giving +# up. 2000 = 20 pages of 100 = "any pipeline triggered in the last few +# months" for an active repo. The bash script's inline lookup is a single +# 100-pipeline page; this MCP-side scan trades a few extra API calls for +# the ability to address older builds by number. +_PIPELINE_SCAN_LIMIT = 2000 + + +class BBOpNotFound(LookupError): + """A requested resource (pipeline build_number, step index, etc.) was + not present in the responses we walked. Distinct from BBApiError so + callers can render "no such pipeline" vs "API failure" differently.""" + + +def _wrap_uuid(uuid: str) -> str: + """Bitbucket's URL contract uses `{uuid}` with the literal braces, + URL-encoded as `%7B...%7D`. The bash script does this by interpolating + `%7B${uuid}%7D` directly into curl URLs; mirror that. The UUID itself + is alphanumeric+hyphens so `quote` would no-op on it, but we route + through it to defend against a UUID that ever contains characters + that would otherwise need encoding.""" + inner = uuid.strip() + if inner.startswith("{") and inner.endswith("}"): + inner = inner[1:-1] + return f"%7B{quote(inner)}%7D" + + +def _pipelines_root(workspace: str, repo: str) -> str: + """Common URL prefix for the pipelines API. Centralising this means a + future API-version bump touches one place.""" + return f"{repo_path(workspace, repo)}/pipelines/" + + +def _strip_uuid_braces(uuid: str | None) -> str: + """Bitbucket returns pipeline UUIDs in two shapes depending on endpoint: + bare ('a1b2-...') and brace-wrapped ('{a1b2-...}'). Normalise to bare + so callers don't have to care.""" + if not uuid: + raise BBApiError(0, "", "response missing uuid") + s = uuid.strip() + if s.startswith("{") and s.endswith("}"): + return s[1:-1] + return s + + +# --------------------------------------------------------------------------- +# Resolution helpers (build_number -> uuid, step_index -> uuid) +# --------------------------------------------------------------------------- + + +def _resolve_pipeline_uuid( + client: BBClient, + workspace: str, + repo: str, + build_number: int, + *, + scan_limit: int = _PIPELINE_SCAN_LIMIT, +) -> str: + """Resolve a pipeline's UUID by walking pipelines/ sorted by most-recent. + + Bitbucket Cloud's API does not expose a direct `GET /pipelines/{build_number}` + endpoint, only `GET /pipelines/{uuid}`. The CLI passes a build_number + because that's what's user-visible (and what the API echoes in payloads). + This helper paginates the listing until it finds the matching build or + `scan_limit` items have been examined. + + Raises BBOpNotFound if the build_number isn't found within the scan + window. Distinct from a network/API failure so callers can render + "no such pipeline #N" naturally. + """ + if not _is_positive_int(build_number): + raise ValueError(f"build_number must be a positive int, got {build_number!r}") + + seen = 0 + path = _pipelines_root(workspace, repo) + query = {"sort": "-created_on", "pagelen": _BITBUCKET_MAX_PAGELEN} + for pipeline in client.paginate(path, query=query): + seen += 1 + if pipeline.get("build_number") == build_number: + return _strip_uuid_braces(pipeline.get("uuid")) + if seen >= scan_limit: + break + raise BBOpNotFound( + f"pipeline #{build_number} not found within the {scan_limit} most-recent " + f"pipelines of {workspace}/{repo}" + ) + + +def _resolve_step_uuid( + client: BBClient, + workspace: str, + repo: str, + pipeline_uuid: str, + step_index: int, +) -> str: + """Return the step UUID for the step at the given 0-based index. + + The bash script's `bb logs` uses the same 0-based indexing into the + steps list; mirror that contract so the user-facing index numbers + match across both surfaces. + + Returns just the uuid (not the name) — callers that need the name + should fetch the steps list themselves via `pipeline_steps()`. The + MCP step-logs tool wraps the log payload in its own response shape + and can surface the name there. + """ + if ( + not isinstance(step_index, int) + or isinstance(step_index, bool) + or step_index < 0 + ): + raise ValueError(f"step_index must be a non-negative int, got {step_index!r}") + + steps = _pipeline_steps_by_uuid(client, workspace, repo, pipeline_uuid) + if step_index >= len(steps): + raise BBOpNotFound( + f"step index {step_index} out of range " + f"(pipeline has {len(steps)} step{'s' if len(steps) != 1 else ''})" + ) + return _strip_uuid_braces(steps[step_index].get("uuid")) + + +# --------------------------------------------------------------------------- +# Public operations +# --------------------------------------------------------------------------- + + +def pipelines_list( + client: BBClient, + workspace: str, + repo: str, + *, + count: int = 10, + sort: str = "-created_on", + branch: str | None = None, +) -> list[dict[str, Any]]: + """List recent pipelines, most-recent first by default. + + `count` is the upper bound on returned items. We always honour it + even if it exceeds Bitbucket's per-page cap (100): the function + paginates as needed. + + `branch` filters to pipelines triggered against a specific branch via + Bitbucket's `target.ref_name` query (the API supports this without a + `?q=` filter shape). + """ + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + query: dict[str, Any] = {"sort": sort, "pagelen": pagelen} + if branch is not None: + query["target.ref_name"] = branch + + out: list[dict[str, Any]] = [] + for pipeline in client.paginate(_pipelines_root(workspace, repo), query=query): + out.append(pipeline) + if len(out) >= count: + break + return out + + +def pipeline_show( + client: BBClient, workspace: str, repo: str, build_number: int +) -> dict[str, Any]: + """Fetch full pipeline detail for the given build_number.""" + uuid = _resolve_pipeline_uuid(client, workspace, repo, build_number) + return client.get(f"{_pipelines_root(workspace, repo)}{_wrap_uuid(uuid)}") + + +def _pipeline_steps_by_uuid( + client: BBClient, workspace: str, repo: str, pipeline_uuid: str +) -> list[dict[str, Any]]: + """Internal: list steps when you already have a pipeline UUID. Used by + `_resolve_step_uuid` (which already paid the build_number→uuid lookup) + to avoid a second list-pipelines walk.""" + uuid = _strip_uuid_braces(pipeline_uuid) + path = f"{_pipelines_root(workspace, repo)}{_wrap_uuid(uuid)}/steps/" + return list(client.paginate(path, query={"pagelen": _BITBUCKET_MAX_PAGELEN})) + + +def pipeline_steps( + client: BBClient, workspace: str, repo: str, build_number: int +) -> list[dict[str, Any]]: + """List the steps of a pipeline by build_number.""" + uuid = _resolve_pipeline_uuid(client, workspace, repo, build_number) + return _pipeline_steps_by_uuid(client, workspace, repo, uuid) + + +def pipeline_trigger( + client: BBClient, + workspace: str, + repo: str, + *, + branch: str, + pattern: str | None = None, + variables: dict[str, str] | Iterable[tuple[str, str]] | None = None, +) -> dict[str, Any]: + """Trigger a new pipeline run. + + Without `pattern`, runs the branch's default pipeline. + With `pattern`, runs the named custom pipeline (must be defined in + bitbucket-pipelines.yml under `custom:`). + + `variables` is the set of pipeline variables to pass — a dict + {name: value} or an iterable of (name, value) tuples. Values must + be strings; Bitbucket does not accept other JSON types for variables. + + Returns the new pipeline's record (includes build_number, uuid, etc.). + """ + if not branch or not isinstance(branch, str): + raise ValueError(f"branch is required and must be a string, got {branch!r}") + + target: dict[str, Any] = {"ref_name": branch, "ref_type": "branch"} + if pattern is not None: + if not isinstance(pattern, str) or not pattern: + raise ValueError(f"pattern must be a non-empty string, got {pattern!r}") + target["selector"] = {"type": "custom", "pattern": pattern} + + payload: dict[str, Any] = {"target": target} + + if variables is not None: + # Normalise to a list of {"key": k, "value": v} dicts — Bitbucket's + # contract. Accept both dict and iterable-of-pairs at the Python + # boundary so MCP tool args can use either. + if isinstance(variables, dict): + items = list(variables.items()) + else: + items = list(variables) + normalised: list[dict[str, str]] = [] + for k, v in items: + if not isinstance(k, str) or not k: + raise ValueError(f"variable key must be a non-empty string, got {k!r}") + if not isinstance(v, str): + raise ValueError( + f"variable value for {k!r} must be a string, got {type(v).__name__}" + ) + normalised.append({"key": k, "value": v}) + if normalised: + payload["variables"] = normalised + + return client.post(_pipelines_root(workspace, repo), json_body=payload) + + +def pipeline_stop( + client: BBClient, workspace: str, repo: str, build_number: int +) -> Any: + """Stop a running pipeline. Returns the raw API response (typically + None on success — Bitbucket returns 204). The bash script discards + this response with `> /dev/null`; we return it so the MCP tool can + surface a structured outcome (parity follow-up for 4.7).""" + uuid = _resolve_pipeline_uuid(client, workspace, repo, build_number) + path = f"{_pipelines_root(workspace, repo)}{_wrap_uuid(uuid)}/stopPipeline" + return client.post(path) + + +def pipeline_logs( + client: BBClient, + workspace: str, + repo: str, + build_number: int, + step_index: int, + *, + timeout: float = 120.0, +) -> str: + """Fetch raw log text for a pipeline step (0-based step index). + + Bitbucket returns either the log body inline (200) or a 307 redirect + to an S3 signed URL. The fetch helper follows redirects while + stripping the Authorization header on cross-host hops so the + Bitbucket credential is never sent to S3. Default timeout is 120s + because log payloads can be large and the bash equivalent uses + no timeout cap. + """ + pipeline_uuid = _resolve_pipeline_uuid(client, workspace, repo, build_number) + step_uuid = _resolve_step_uuid( + client, workspace, repo, pipeline_uuid, step_index + ) + path = ( + f"{_pipelines_root(workspace, repo)}" + f"{_wrap_uuid(pipeline_uuid)}/steps/{_wrap_uuid(step_uuid)}/log" + ) + return client.fetch_redirected_text(path, timeout=timeout) + + +# =========================================================================== +# PULL REQUEST OPERATIONS +# =========================================================================== + +# Bitbucket's documented merge strategies. Validating at the boundary +# means the MCP tool fails fast on a typo rather than waiting for the +# server's 400. +_VALID_MERGE_STRATEGIES = frozenset({"merge_commit", "squash", "fast_forward"}) + +# PR `state` filter values the Bitbucket API accepts on the simple +# `?state=` query parameter. For multi-state filtering, Bitbucket requires +# the BBQL `q` parameter (e.g. `?q=state="OPEN" OR state="MERGED"`); the +# `?state=OPEN,MERGED` shape returns 400 / empty results. We validate the +# scalar form against this set when prs_list is called with `state=`; +# callers needing compound filtering should construct a `q=` query and +# call `client.paginate` directly. +_KNOWN_PR_STATES = frozenset({"OPEN", "MERGED", "DECLINED", "SUPERSEDED"}) + + +def _prs_root(workspace: str, repo: str) -> str: + """Common URL prefix for the pull-requests API.""" + return f"{repo_path(workspace, repo)}/pullrequests" + + +def _validate_pr_id(pr_id: int) -> None: + """PR IDs are positive integers. The bash script passes them as bare + strings and lets Bitbucket reject malformed values; we fail at the + boundary so the MCP tool surfaces a clear error before any network + call burns API budget. + + Rejects bool explicitly (`True`/`False` are subclass-of-int in Python + but stringify to `"True"`/`"False"` in URLs, not `"1"`/`"0"`). + """ + if not _is_positive_int(pr_id): + raise ValueError(f"pr_id must be a positive int, got {pr_id!r}") + + +def prs_list( + client: BBClient, + workspace: str, + repo: str, + *, + state: str = "OPEN", + count: int = 25, +) -> list[dict[str, Any]]: + """List pull requests filtered by state. Defaults match bash: + state=OPEN, count=25. Walks pages as needed to honour `count`.""" + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + if not isinstance(state, str) or not state: + raise ValueError(f"state must be a non-empty string, got {state!r}") + # _KNOWN_PR_STATES is the boundary check. Without it, typos like + # state="OPENED", case bugs like state="open", and unsupported + # compound forms like state="OPEN,MERGED" would burn an API call + # before failing (Bitbucket returns 400 or empty results). For + # compound filtering use a `?q=` query via client.paginate directly. + if state not in _KNOWN_PR_STATES: + raise ValueError( + f"state must be one of {sorted(_KNOWN_PR_STATES)}, got {state!r}" + ) + + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + query: dict[str, Any] = {"state": state, "pagelen": pagelen} + + out: list[dict[str, Any]] = [] + for pr in client.paginate(_prs_root(workspace, repo), query=query): + out.append(pr) + if len(out) >= count: + break + return out + + +def pr_show( + client: BBClient, workspace: str, repo: str, pr_id: int +) -> dict[str, Any]: + """Fetch a pull request by its numeric ID.""" + _validate_pr_id(pr_id) + return client.get(f"{_prs_root(workspace, repo)}/{pr_id}") + + +def pr_activity( + client: BBClient, + workspace: str, + repo: str, + pr_id: int, + *, + count: int = 50, +) -> list[dict[str, Any]]: + """List the activity stream on a PR (approvals, comment events, state + transitions). Used by the bash `bb pr` to surface approver names; + surfaced separately as an op so the MCP agent can render its own view + of the activity timeline.""" + _validate_pr_id(pr_id) + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + out: list[dict[str, Any]] = [] + for entry in client.paginate( + f"{_prs_root(workspace, repo)}/{pr_id}/activity", + query={"pagelen": pagelen}, + ): + out.append(entry) + if len(out) >= count: + break + return out + + +def pr_create( + client: BBClient, + workspace: str, + repo: str, + *, + title: str, + source_branch: str, + destination_branch: str = "main", + description: str = "", + close_source_branch: bool = True, + reviewers: Iterable[str] | None = None, +) -> dict[str, Any]: + """Create a pull request. + + `reviewers` is an iterable of Bitbucket account UUIDs (the API expects + `[{"uuid": "..."}, ...]`). The bash script doesn't expose reviewers + at create-time — that's a 4.7 parity gap, not a Python bug. + + `close_source_branch=True` matches bash's default (it hardcodes that + flag in the create payload). If you don't want the branch deleted on + merge, pass False explicitly. + """ + for label, value in ( + ("title", title), + ("source_branch", source_branch), + ("destination_branch", destination_branch), + ): + # Strip-check rather than truthiness so " " / "\n\t" don't slip + # through. A whitespace-only PR title is technically accepted by + # Bitbucket but visually meaningless in any PR list view. + if not isinstance(value, str) or not value.strip(): + raise ValueError( + f"{label} must be a non-empty, non-whitespace string, got {value!r}" + ) + if not isinstance(description, str): + raise ValueError( + f"description must be a string, got {type(description).__name__}" + ) + if not isinstance(close_source_branch, bool): + raise ValueError( + f"close_source_branch must be a bool, got {type(close_source_branch).__name__}" + ) + + payload: dict[str, Any] = { + "title": title, + "source": {"branch": {"name": source_branch}}, + "destination": {"branch": {"name": destination_branch}}, + "close_source_branch": close_source_branch, + } + # Bash includes an empty description string ALWAYS; Python omits + # when the description is empty or whitespace-only so the API payload + # stays meaningful. Parity item: bash should align on omission. + if description.strip(): + payload["description"] = description + if reviewers is not None: + # A bare string is technically an Iterable[str] (yields characters), + # which would silently produce `[{"uuid":"a"}, {"uuid":"l"}, ...]` + # from `reviewers="alice-uuid"`. Reject explicitly so the typo + # fails locally rather than as a 400 from Bitbucket. + if isinstance(reviewers, str): + raise ValueError( + f"reviewers must be a list/tuple of uuids, not a bare string. " + f"Got {reviewers!r}; did you mean [{reviewers!r}]?" + ) + normalised: list[dict[str, str]] = [] + for uuid in reviewers: + if not isinstance(uuid, str) or not uuid: + raise ValueError( + f"reviewer uuids must be non-empty strings, got {uuid!r}" + ) + normalised.append({"uuid": uuid}) + if normalised: + payload["reviewers"] = normalised + + return client.post(_prs_root(workspace, repo), json_body=payload) + + +def pr_approve( + client: BBClient, workspace: str, repo: str, pr_id: int +) -> Any: + """Approve a pull request as the authenticated user. Returns the + approval record; the bash equivalent discards it with `> /dev/null`.""" + _validate_pr_id(pr_id) + return client.post(f"{_prs_root(workspace, repo)}/{pr_id}/approve") + + +def pr_unapprove( + client: BBClient, workspace: str, repo: str, pr_id: int +) -> Any: + """Remove the authenticated user's approval from a PR. + + Not exposed by the bash CLI today — this is one of the parity gaps + that 4.7 will fill. The Bitbucket REST contract is a DELETE against + the same /approve subpath that POST uses for approval. + """ + _validate_pr_id(pr_id) + return client.delete(f"{_prs_root(workspace, repo)}/{pr_id}/approve") + + +def pr_merge( + client: BBClient, + workspace: str, + repo: str, + pr_id: int, + *, + strategy: str = "merge_commit", + close_source_branch: bool = True, + message: str | None = None, +) -> dict[str, Any]: + """Merge a pull request. + + Bitbucket Cloud's documented strategies: `merge_commit` (default), + `squash`, `fast_forward`. We validate at the boundary so a typo + fails locally rather than burning an API call to get a 400. + + `message` overrides the default merge-commit message. `close_source_branch` + matches bash's default of True. + """ + _validate_pr_id(pr_id) + # isinstance gate before the membership test: a non-hashable strategy + # (list, dict, set) would otherwise raise TypeError from the frozenset + # `in` check rather than the documented ValueError, breaking the + # "every boundary failure is ValueError" convention this file follows. + if not isinstance(strategy, str) or strategy not in _VALID_MERGE_STRATEGIES: + raise ValueError( + f"strategy must be one of {sorted(_VALID_MERGE_STRATEGIES)}, " + f"got {strategy!r}" + ) + if not isinstance(close_source_branch, bool): + raise ValueError( + f"close_source_branch must be a bool, got {type(close_source_branch).__name__}" + ) + payload: dict[str, Any] = { + "type": "pullrequest", + "merge_strategy": strategy, + "close_source_branch": close_source_branch, + } + if message is not None: + # Symmetric with pr_comment_add's body validation: empty (or + # whitespace-only) message would produce a blank merge-commit + # subject line, visually empty in any `git log --oneline` view. + # Reject at the boundary. + if not isinstance(message, str) or not message.strip(): + raise ValueError( + f"message must be a non-empty, non-whitespace string " + f"when provided, got {message!r}" + ) + payload["message"] = message + # Mirror bash's PUT verb (cmd_pr_merge uses bb_put). Bitbucket Cloud + # has historically accepted both PUT and POST for this endpoint, and + # the bash side is the verified-working contract. Flagged as a 4.7 + # investigation: verify against current Bitbucket docs and align on + # one verb (POST is the modern documented shape per their REST docs + # at time of writing). + return client.put( + f"{_prs_root(workspace, repo)}/{pr_id}/merge", + json_body=payload, + ) + + +def pr_decline( + client: BBClient, workspace: str, repo: str, pr_id: int +) -> Any: + """Decline (close without merging) a pull request.""" + _validate_pr_id(pr_id) + return client.post(f"{_prs_root(workspace, repo)}/{pr_id}/decline") + + +def pr_diff( + client: BBClient, + workspace: str, + repo: str, + pr_id: int, + *, + timeout: float = 120.0, +) -> str: + """Fetch the unified diff text for a pull request. + + Bitbucket returns plain text (not JSON), so we route through + `fetch_redirected_text`. Today the diff endpoint does NOT redirect, + so this is functionally equivalent to a direct GET. If Bitbucket + ever introduces a redirect, the cross-host-auth-strip protection + kicks in — but the returned body would then be whatever the redirect + target serves (a behavioural divergence from bash, which uses + `curl -sf` without `-L` and would fail visibly on any 3xx). Until + that happens, the two surfaces produce identical text. + """ + _validate_pr_id(pr_id) + return client.fetch_redirected_text( + f"{_prs_root(workspace, repo)}/{pr_id}/diff", + timeout=timeout, + ) + + +def pr_comments_list( + client: BBClient, + workspace: str, + repo: str, + pr_id: int, + *, + count: int = 100, +) -> list[dict[str, Any]]: + """List comments on a pull request.""" + _validate_pr_id(pr_id) + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + out: list[dict[str, Any]] = [] + for comment in client.paginate( + f"{_prs_root(workspace, repo)}/{pr_id}/comments", + query={"pagelen": pagelen}, + ): + out.append(comment) + if len(out) >= count: + break + return out + + +def pr_comment_add( + client: BBClient, + workspace: str, + repo: str, + pr_id: int, + body: str, +) -> dict[str, Any]: + """Add a top-level comment to a pull request. + + Not exposed by the bash CLI today — 4.7 parity gap. The Bitbucket + contract is `POST /pullrequests/{id}/comments` with payload + `{"content": {"raw": "<text>"}}`. + """ + _validate_pr_id(pr_id) + if not isinstance(body, str) or not body.strip(): + raise ValueError( + f"body must be a non-empty, non-whitespace string, got {body!r}" + ) + return client.post( + f"{_prs_root(workspace, repo)}/{pr_id}/comments", + json_body={"content": {"raw": body}}, + ) + + +# =========================================================================== +# REPOSITORY / BRANCH / VARIABLES / DOWNLOADS / COMMITS +# =========================================================================== + + +def repos_list( + client: BBClient, + workspace: str | None = None, + *, + count: int = 100, + sort: str = "-updated_on", + query: str | None = None, +) -> list[dict[str, Any]]: + """List repositories in a workspace. + + `workspace=None` defaults to the client's configured workspace + (`client.config.workspace`); pass an explicit workspace to query + a different one (the bash equivalent only ever uses BB_WORKSPACE). + + `query` is a Bitbucket BBQL filter string passed via `?q=`, e.g. + `'name ~ "widget"'`. Bash doesn't expose this; it's a 4.7 parity + gap for the agent's filtered-list workflows. + """ + ws = workspace if workspace is not None else client.config.workspace + # Symmetric with bb_api.repo_path: reject empty/whitespace AND + # embedded `/`, `.`, `..`. Without this, `workspace="acme/widget"` + # would silently build `/repositories/acme/widget` (a single-repo + # endpoint), then paginate against a response that lacks `values` + # — a confusing failure mode the boundary validator exists to + # prevent everywhere else in this file. repos_list is the only op + # that doesn't route through repo_path, so the check is duplicated + # here rather than central. + if not isinstance(ws, str) or not ws.strip(): + raise ValueError(f"workspace must be a non-empty string, got {ws!r}") + if "/" in ws: + raise ValueError(f"workspace must not contain '/', got {ws!r}") + if ws in (".", ".."): + raise ValueError(f"workspace must not be '.' or '..', got {ws!r}") + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + q: dict[str, Any] = {"sort": sort, "pagelen": pagelen} + if query is not None: + if not isinstance(query, str) or not query.strip(): + raise ValueError( + f"query must be a non-empty, non-whitespace string when provided, " + f"got {query!r}" + ) + q["q"] = query + + out: list[dict[str, Any]] = [] + for r in client.paginate(f"/repositories/{ws}", query=q): + out.append(r) + if len(out) >= count: + break + return out + + +def repo_show( + client: BBClient, workspace: str, repo: str +) -> dict[str, Any]: + """Fetch repository metadata: language, size, clone URLs, mainbranch, + privacy, etc.""" + return client.get(repo_path(workspace, repo)) + + +# --- Branches --- + + +def branches_list( + client: BBClient, + workspace: str, + repo: str, + *, + count: int = 50, + sort: str = "-target.date", + query: str | None = None, +) -> list[dict[str, Any]]: + """List branches in the repo, default sort is most-recently-updated + first (matches bash). `query` is a Bitbucket BBQL filter for + name-substring etc.; not exposed by bash.""" + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + q: dict[str, Any] = {"sort": sort, "pagelen": pagelen} + if query is not None: + if not isinstance(query, str) or not query.strip(): + raise ValueError( + f"query must be a non-empty, non-whitespace string when provided, " + f"got {query!r}" + ) + q["q"] = query + + out: list[dict[str, Any]] = [] + for br in client.paginate( + f"{repo_path(workspace, repo)}/refs/branches", query=q + ): + out.append(br) + if len(out) >= count: + break + return out + + +def branch_show( + client: BBClient, workspace: str, repo: str, name: str +) -> dict[str, Any]: + """Fetch a single branch by name. Not exposed by bash today — 4.7 + parity gap. Useful for the agent's "does this branch exist?" lookup + before creating a PR. + + The branch name is URL-encoded; `feat/widget` becomes `feat%2Fwidget` + in the request path so the slash isn't interpreted as a sub-resource. + """ + if not isinstance(name, str) or not name.strip(): + raise ValueError( + f"name must be a non-empty, non-whitespace string, got {name!r}" + ) + # `quote(s, safe="")` URL-encodes `/` to `%2F`. Branch names like + # `feat/widget` would otherwise be interpreted as a sub-resource + # path by Bitbucket and 404. + encoded = quote(name.strip(), safe="") + return client.get(f"{repo_path(workspace, repo)}/refs/branches/{encoded}") + + +# --- Variables (pipeline configuration) --- + + +def vars_list( + client: BBClient, + workspace: str, + repo: str, + *, + count: int = 100, +) -> list[dict[str, Any]]: + """List pipeline configuration variables (key/value pairs, with a + `secured` flag that masks values for sensitive variables). + + Bash truncates secured values to `********` in its display layer; + Python returns the raw dicts (which include `"value": null` for + secured entries — Bitbucket does NOT echo secured values). The + MCP agent surfaces the secured flag explicitly so callers don't + accidentally assume `null` means "unset". + """ + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + out: list[dict[str, Any]] = [] + for v in client.paginate( + f"{repo_path(workspace, repo)}/pipelines_config/variables/", + query={"pagelen": pagelen}, + ): + out.append(v) + if len(out) >= count: + break + return out + + +# --- Downloads (release artifacts) --- + + +def downloads_list( + client: BBClient, + workspace: str, + repo: str, + *, + count: int = 25, +) -> list[dict[str, Any]]: + """List repository download artifacts (the Bitbucket "Downloads" tab + — release binaries, install bundles, etc.).""" + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + out: list[dict[str, Any]] = [] + for d in client.paginate( + f"{repo_path(workspace, repo)}/downloads", + query={"pagelen": pagelen}, + ): + out.append(d) + if len(out) >= count: + break + return out + + +# --- Commits --- + + +def commits_list( + client: BBClient, + workspace: str, + repo: str, + *, + branch: str | None = None, + count: int = 10, +) -> list[dict[str, Any]]: + """List recent commits. + + With `branch=None`, lists across all branches (Bitbucket's + `/commits` endpoint). + + With `branch="feat/widget"`, lists commits reachable from that + branch (`/commits/{branch}`). Branch names are URL-encoded for + the same slash-as-sub-resource reason as branch_show. + + Not exposed by the bash CLI today — 4.7 parity gap. Useful for + the agent's "what shipped recently?" / "what's in this branch + that isn't in main?" workflows. + """ + if not _is_positive_int(count): + raise ValueError(f"count must be a positive int, got {count!r}") + if branch is not None and (not isinstance(branch, str) or not branch.strip()): + raise ValueError( + f"branch must be a non-empty, non-whitespace string when provided, " + f"got {branch!r}" + ) + + pagelen = min(count, _BITBUCKET_MAX_PAGELEN) + if branch is None: + path = f"{repo_path(workspace, repo)}/commits" + else: + encoded = quote(branch.strip(), safe="") + path = f"{repo_path(workspace, repo)}/commits/{encoded}" + + out: list[dict[str, Any]] = [] + for c in client.paginate(path, query={"pagelen": pagelen}): + out.append(c) + if len(out) >= count: + break + return out diff --git a/docs/img/social-preview.png b/docs/img/social-preview.png new file mode 100644 index 0000000..d1cd18e Binary files /dev/null and b/docs/img/social-preview.png differ diff --git a/git_ops.py b/git_ops.py new file mode 100644 index 0000000..3da8d8b --- /dev/null +++ b/git_ops.py @@ -0,0 +1,713 @@ +""" +git_ops — lightweight git wrappers used by the MCP server. + +The agent needs git context to do its job: "what's my current branch?", +"is the working tree clean?", "what's the workspace/repo from origin?", +"what did I commit recently?", "what's uncommitted?". These wrappers +shell out to `git` with the safe-to-test `runner=` injection seam from +`bb_api.detect_repo` so the test suite never touches a real subprocess. + +Stdlib-only on purpose, same as bb_api: keeps the MCP server's bootstrap +fast and minimises the supply-chain surface. + +Public surface (all functions accept an optional `path` defaulting to the +current working directory): + + git_current_branch(path?) -> str + git_status(path?) -> dict + git_remote_repo(path?) -> (workspace, repo) + git_recent_commits(path?, *, count=10) -> list[dict] + git_uncommitted_changes(path?) -> dict + +Errors raise `GitOpError` with the failing command's stderr so callers can +surface a useful message rather than guessing at the failure mode. +""" + +from __future__ import annotations + +import os +import re +import subprocess +from typing import Any + +from bb_api import parse_remote_url + + +# Regex for redacting URL-embedded credentials before they land in error +# messages. Matches the `user:token@` (or `user@`) shape in any URL and +# replaces with `[redacted]@`. Used in any GitOpError message that +# echoes a remote URL — we'd rather lose the auth detail than leak a +# token into the MCP agent's context / downstream logs. +# +# `[^/]+` (excludes only `/`, allows `@` inside) so a password +# containing a literal `@` (legal in RFC 3986 syntax) is greedy-matched +# up to the LAST `@` before the path. The previous `[^/@]+@` shape +# would have stopped at the first `@`, leaking the tail of the password +# (e.g. `https://user:p@ss@host/...` → `https://[redacted]@ss@host/...`). +_URL_CRED_PATTERN = re.compile(r"://[^/]+@") + + +def _redact_url_creds(url: str) -> str: + """Strip `user:token@` from a URL before echoing it in error text.""" + return _URL_CRED_PATTERN.sub("://[redacted]@", url) + +# Subprocess defaults applied to every `git` call: +# +# timeout=30s — a wedged git (stuck on a credential-helper prompt, a +# held `.git/index.lock`, an NFS mount whose server went away) would +# otherwise hang the MCP server thread forever with no recovery path. +# +# encoding="utf-8" — `text=True` alone defers to +# locale.getpreferredencoding(), which on minimal Docker / cron / +# systemd contexts is ASCII or C, blowing up with UnicodeDecodeError +# on a UTF-8 filename or author name BEFORE we can wrap as GitOpError. +# +# errors="replace" — never crash on a non-UTF-8 byte; substitute U+FFFD +# and keep going. The caller would rather see a single replacement +# character than an exception inside subprocess.run. +_GIT_SUBPROCESS_TIMEOUT = 30.0 + +# `-c color.ui=never` injected into every git invocation. A developer +# with `color.ui = always` in ~/.gitconfig forces git to emit ANSI +# escape sequences even when stdout is a pipe — the MCP agent (and +# any other consumer) would see `\x1b[31m...\x1b[m` garbage in diffs +# and log output. Disabling color at the command level overrides the +# config and matches what every other "machine-readable git" wrapper +# does. +_GIT_NO_COLOR = ["-c", "color.ui=never"] + + +# Sentinel returncode for parse-failure errors (the git command itself +# exited 0, but our parser couldn't make sense of the output). Picked +# at -1000 to stay outside Python's signal-killed convention: a +# subprocess child killed by signal N has returncode = -N (e.g. -1 for +# SIGHUP, -9 for SIGKILL, -15 for SIGTERM). Callers branching on +# `err.returncode == GIT_PARSE_ERROR_RETURNCODE` would otherwise +# misclassify a SIGHUP-killed git as a parse failure. +GIT_PARSE_ERROR_RETURNCODE = -1000 + + +class GitOpError(RuntimeError): + """Raised when a `git` invocation fails or returns unparseable output. + + Carries the failing command's stderr (truncated in the message) and + a returncode field. For genuine git failures, returncode is git's own + exit code (>= 0). For parse failures where git exited 0 but the + parser couldn't extract a usable value, returncode is + `GIT_PARSE_ERROR_RETURNCODE` (-1) — callers branching on git exit + semantics should gate on `err.returncode >= 0` first. A separate + exception class from `BBApiError` so MCP tools can render "git + failure" vs "Bitbucket failure" differently. + """ + + def __init__(self, command: list[str], returncode: int, stderr: str): + super().__init__( + f"git {' '.join(command[1:])!r} failed (exit {returncode}): {stderr.strip()[:500]}" + ) + self.command = command + self.returncode = returncode + self.stderr = stderr + + +def _run_git( + args: list[str], + *, + path: str | os.PathLike[str] | None = None, + runner: Any = subprocess, +) -> str: + """Run `git <args>` and return stdout text. Mirrors `bb_api.detect_repo`'s + runner-injection pattern so tests can substitute a fake subprocess + without monkey-patching the module. + + Every call is wrapped with: + - `git -c color.ui=never` so ANSI escapes never leak into diffs + / log output regardless of the user's gitconfig. + - `timeout=_GIT_SUBPROCESS_TIMEOUT` so a wedged git can't hang + the MCP server. + - `encoding="utf-8", errors="replace"` so non-ASCII filenames + / author names don't trip the locale-default decoder in + minimal-environment containers. + """ + cmd = ["git", *_GIT_NO_COLOR, *args] + cwd = str(path) if path is not None else None + # Environment hardening (belt + suspenders alongside the 30s timeout): + # GIT_TERMINAL_PROMPT=0 — git itself refuses to prompt for input, + # so a credential-helper-less repo with a 401 fails immediately + # with a clear error instead of wedging for 30s on a hidden + # prompt. + # GIT_ASKPASS="" — disables any GUI askpass helper that would + # otherwise pop up out-of-band (X11 dialog, macOS keychain + # prompt) and block the subprocess. + git_env = {**os.environ, "GIT_TERMINAL_PROMPT": "0", "GIT_ASKPASS": ""} + try: + result = runner.run( + cmd, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + cwd=cwd, + check=False, + timeout=_GIT_SUBPROCESS_TIMEOUT, + # stdin=DEVNULL turns any prompt into an immediate EOF + # instead of letting git read from whatever stdin the + # MCP server inherited. + stdin=subprocess.DEVNULL, + env=git_env, + ) + except FileNotFoundError as e: + # subprocess raises FileNotFoundError for two distinct cases. + # Disambiguate so the agent sees the actual cause: + # - Missing cwd directory: e.filename is the cwd path + # - Missing git binary: e.filename is the executable name (git) + if cwd is not None and getattr(e, "filename", None) == cwd: + raise GitOpError( + cmd, 127, f"path does not exist: {cwd}" + ) from e + raise GitOpError(cmd, 127, "git executable not found on PATH") from e + except NotADirectoryError as e: + # cwd is a path that exists but isn't a directory (e.g. a regular + # file). Wrap as GitOpError so callers always see the documented + # contract instead of a raw OSError. + raise GitOpError( + cmd, 127, f"path is not a directory: {getattr(e, 'filename', cwd) or cwd!r}" + ) from e + except PermissionError as e: + # cwd unreadable / git binary lacks +x. e.filename indicates which. + raise GitOpError( + cmd, 126, f"permission denied: {getattr(e, 'filename', cwd) or 'git'!r}" + ) from e + except subprocess.TimeoutExpired as e: + # Wrap so callers always see GitOpError. Use the parse-error + # sentinel (-1) — the git process never exited, so there's no + # real returncode to surface. + raise GitOpError( + cmd, + GIT_PARSE_ERROR_RETURNCODE, + f"git invocation timed out after {_GIT_SUBPROCESS_TIMEOUT}s", + ) from e + + if result.returncode != 0: + raise GitOpError(cmd, result.returncode, result.stderr or "") + return result.stdout + + +# --------------------------------------------------------------------------- +# Current branch +# --------------------------------------------------------------------------- + + +def git_current_branch( + path: str | os.PathLike[str] | None = None, + *, + runner: Any = subprocess, +) -> str: + """Return the current branch name. + + Detached HEAD returns the literal string `"HEAD"`. The companion + `git_status` function normalises porcelain v2's `"(detached)"` to + the same `"HEAD"` sentinel so cross-checks between the two + functions agree on the same underlying state. + """ + out = _run_git( + ["rev-parse", "--abbrev-ref", "HEAD"], path=path, runner=runner + ) + branch = out.strip() + if not branch: + # Include the same `-c color.ui=never` prefix that _run_git + # actually executed, so err.command reflects the real + # invocation when a caller introspects the error. + raise GitOpError( + ["git", *_GIT_NO_COLOR, "rev-parse", "--abbrev-ref", "HEAD"], + GIT_PARSE_ERROR_RETURNCODE, + "git returned empty branch name", + ) + return branch + + +# --------------------------------------------------------------------------- +# Remote-origin -> (workspace, repo) +# --------------------------------------------------------------------------- + + +def git_remote_repo( + path: str | os.PathLike[str] | None = None, + *, + runner: Any = subprocess, +) -> tuple[str, str]: + """Return (workspace, repo_slug) parsed from the `origin` remote URL. + + Distinct from `bb_api.detect_repo` because the MCP server's git-context + layer needs the workspace too (for cross-workspace operations the + agent might attempt). bb_api.detect_repo is the bash-parity surface + that returns only the repo slug. + + Raises GitOpError if there's no origin remote, or the URL doesn't + parse as a workspace/repo pair. Same loose-host-parsing behaviour as + `parse_remote_url` (intentional — enterprise / self-hosted Bitbucket + deployments use non-bitbucket.org URLs). + """ + url = _run_git(["remote", "get-url", "origin"], path=path, runner=runner) + parsed = parse_remote_url(url) + if parsed is None: + # Redact any embedded `user:token@` before the URL lands in the + # error message — URL-embedded auth is a common CI pattern + # (e.g. `https://x-token-auth:abcd@bitbucket.org/...`), and + # this string flows up through MCP into the agent's context + # and downstream logs. + safe_url = _redact_url_creds(url.strip()) + # Include the same `-c color.ui=never` prefix that _run_git + # actually executed. + raise GitOpError( + ["git", *_GIT_NO_COLOR, "remote", "get-url", "origin"], + GIT_PARSE_ERROR_RETURNCODE, + f"could not parse workspace/repo from origin URL: {safe_url!r}", + ) + return parsed + + +# --------------------------------------------------------------------------- +# Status (branch, clean/dirty, ahead/behind, file lists) +# --------------------------------------------------------------------------- + + +def _parse_status_porcelain_v2(text: str) -> dict[str, Any]: + """Parse `git status --porcelain=v2 --branch` output into a structured + dict. The format is documented in `git help status` under + "Porcelain Format Version 2" — stable across git versions and + designed for machine consumption. + + Header lines start with `#`: + # branch.oid <commit> + # branch.head <branch-name> (or "(detached)") + # branch.upstream <upstream-name> (optional) + # branch.ab +<ahead> -<behind> (optional, when upstream is set) + + Then per-file lines: + 1 <XY> ... -> tracked, ordinary changes (X=staged, Y=worktree) + 2 <XY> ... -> tracked, renamed/copied + u <XY> ... -> unmerged + ? <path> -> untracked + ! <path> -> ignored (we never request these via --untracked-files=normal) + """ + out: dict[str, Any] = { + "branch": None, + "upstream": None, + "ahead": 0, + "behind": 0, + "clean": True, + "staged": [], + "modified": [], + "untracked": [], + "unmerged": [], + } + # branch.oid / branch.head order in porcelain v2 output is not + # documented as stable. Track unborn state with a flag and apply + # the normalisation after the loop so it wins regardless of order. + is_unborn = False + # split("\n") rather than splitlines(): splitlines() also breaks on + # \r / \v / \f / U+0085 / U+2028 / U+2029, any of which could appear + # inside a path on platforms / repositories that allow them, causing + # one logical record to fragment into multiple "lines" that each + # fail the per-line guards and get silently dropped. + for line in text.split("\n"): + if not line: + continue + if line.startswith("# branch.head "): + branch = line[len("# branch.head ") :].strip() + # Porcelain v2 emits "(detached)" for detached HEAD; the + # `git rev-parse --abbrev-ref HEAD` path emits the literal + # string "HEAD" for the same state. Normalise to "HEAD" here + # so cross-checks between git_current_branch and git_status + # never disagree on the same underlying state. + out["branch"] = "HEAD" if branch == "(detached)" else branch + elif line.startswith("# branch.oid "): + # On a freshly `git init`'d repo with no commits, branch.oid + # is "(initial)" — flag for the end-of-loop normalisation. + oid = line[len("# branch.oid ") :].strip() + if oid == "(initial)": + is_unborn = True + elif line.startswith("# branch.upstream "): + out["upstream"] = line[len("# branch.upstream ") :].strip() + elif line.startswith("# branch.ab "): + # Format: "# branch.ab +N -M" per porcelain v2 spec. Validate + # strictly: must be exactly "+<digits> -<digits>" with no + # double-sign smuggling (the previous startswith-only check + # accepted "+-3" → int("-3") → ahead=-3, contradicting the + # parser's own promise to reject negative values). + parts = line[len("# branch.ab ") :].split() + if ( + len(parts) == 2 + and parts[0].startswith("+") + and parts[1].startswith("-") + and parts[0][1:].isdigit() + and parts[1][1:].isdigit() + ): + # Parse both into locals first, commit only on full + # success. Non-atomic try/except previously could leave + # ahead updated while behind silently kept the default + # 0, producing an internally-inconsistent dict on a + # half-malformed line like "+5 -junk". + ahead = int(parts[0][1:]) + behind = int(parts[1][1:]) + out["ahead"] = ahead + out["behind"] = behind + elif line.startswith("1 "): + # Ordinary tracked file. Format: + # 1 XY <sub> <mH> <mI> <mW> <hH> <hI> <path> + # 9 space-separated tokens; path is the last one. + tokens = line.split(" ", 8) + if len(tokens) < 9: + continue + xy, path = tokens[1], tokens[8] + if len(xy) != 2: + continue # malformed XY field + staged_status, worktree_status = xy[0], xy[1] + if staged_status != ".": + out["staged"].append(path) + if worktree_status != ".": + out["modified"].append(path) + elif line.startswith("2 "): + # Renamed/copied tracked file. Format: + # 2 XY <sub> <mH> <mI> <mW> <hH> <hI> <X><score> <new-path>\t<orig-path> + # 10 space-separated tokens; the new path comes after the + # rename-score (e.g. "R100") and is tab-separated from the + # original path. Keep only the new path (matches the bash + # `git status` display default). + tokens = line.split(" ", 9) + if len(tokens) < 10: + continue + xy, path_field = tokens[1], tokens[9] + if len(xy) != 2: + continue + path = path_field.split("\t", 1)[0] + staged_status, worktree_status = xy[0], xy[1] + if staged_status != ".": + out["staged"].append(path) + if worktree_status != ".": + out["modified"].append(path) + elif line.startswith("u "): + # Unmerged. Format: + # u XY <sub> <m1> <m2> <m3> <mW> <h1> <h2> <h3> <path> + # XY width check parity with type-1 and type-2 paths. + tokens = line.split(" ", 10) + if len(tokens) >= 11: + xy = tokens[1] + if len(xy) != 2: + continue + out["unmerged"].append(tokens[10]) + elif line.startswith("? "): + # Untracked. Strip the "? " prefix; skip if the remainder is + # empty (would otherwise append "" and flip clean=False with + # a phantom entry). + path = line[2:] + if path: + out["untracked"].append(path) + # `! ignored` and any other prefixes are ignored intentionally. + + if is_unborn: + # Override any branch.head value (which would be the would-be + # branch name on the unborn line, e.g. "main") with the HEAD + # sentinel so cross-checks with git_current_branch agree on + # "this is a weird state, not a regular branch." + out["branch"] = "HEAD" + out["clean"] = ( + not out["staged"] + and not out["modified"] + and not out["untracked"] + and not out["unmerged"] + ) + # Cap each file-list field and surface the omitted count in a + # sibling field. A repo with millions of untracked files + # (forgot-to-gitignore-node_modules onboarding bug) would otherwise + # return all of them across MCP. + for key in ("staged", "modified", "untracked", "unmerged"): + out[key], out[f"{key}_omitted"] = _truncated_path_list(out[key]) + return out + + +def git_status( + path: str | os.PathLike[str] | None = None, + *, + runner: Any = subprocess, +) -> dict[str, Any]: + """Return a structured snapshot of the working-tree state. + + Returned dict shape: + + { + "branch": "feat/widget" | "HEAD" (detached or unborn), + "upstream": "origin/feat/widget" | None, + "ahead": 0, + "behind": 0, + "clean": True/False, + "staged": [path, ...], + "modified": [path, ...], + "untracked": [path, ...], + "unmerged": [path, ...], + "staged_omitted": 0, + "modified_omitted": 0, + "untracked_omitted": 0, + "unmerged_omitted": 0, + } + + `clean` is True iff there are no staged, modified, untracked, or + unmerged entries. `ahead`/`behind` are zero when no upstream is set + or when the branch is in sync. + + `branch` is the literal string "HEAD" for both detached and unborn + state (matching what git_current_branch returns for the same + states), so cross-checks between the two functions agree on "this + is a weird state, not a regular branch." + + Each file-list field is capped at `_MAX_PATH_LIST` entries; the + sibling `*_omitted` field carries the count of entries that were + dropped (0 when the list fits under the cap). The list itself + contains only real paths — no sentinel marker — so callers + iterating with `os.stat` etc. don't trip on a non-path entry. + + Known limitation: pathnames containing newlines, tabs, double-quotes, + or non-ASCII control characters are returned in git's C-quoted form + (e.g. `weird\\"name.py` instead of `weird"name.py`) because we use + the line-oriented porcelain=v2 output. Switching to `-z` + NUL-split + would be the robust fix; deferred until a real bug report shows up + (this affects 0% of file names in typical use). The same limitation + applies to git_uncommitted_changes() which parses `git ls-files` + output in line-oriented mode. + """ + text = _run_git( + ["status", "--porcelain=v2", "--branch", "--untracked-files=normal"], + path=path, + runner=runner, + ) + return _parse_status_porcelain_v2(text) + + +# --------------------------------------------------------------------------- +# Recent commits +# --------------------------------------------------------------------------- + + +# Unit Separator (0x1F) — a control character that almost never appears +# in commit subjects, author names, or dates in practice. git stores +# arbitrary bytes, so a commit message could technically contain +# U+001F; the parser handles malformed lines defensively below by +# skipping them. The trade-off accepted here is "rare data loss on a +# pathological commit" vs "a robust separator that won't collide with +# common subject content like pipes, tabs, or colons." A NUL-separated +# `git log -z --pretty=...%x00` shape is the genuinely safe variant if +# this ever bites a real user. +_LOG_FIELD_SEP = "\x1f" + +# Upper bound on `git log -n<count>` requests. Without this, an agent +# that confabulates `count=1_000_000` would silently pull a million +# commit records into memory and back across the MCP boundary. 1000 is +# generous for any "show me recent activity" workflow. +_MAX_LOG_COUNT = 1000 + + +def git_recent_commits( + path: str | os.PathLike[str] | None = None, + *, + count: int = 10, + ref: str = "HEAD", + runner: Any = subprocess, +) -> list[dict[str, Any]]: + """Return the most recent `count` commits reachable from `ref`. + + Each entry: + + { + "sha": "<full 40-char hash>", + "short": "<7-char abbreviated hash>", + "subject": "<commit subject line>", + "author": "<author display name>", + "date": "<ISO 8601 author date>", + } + + The format string uses U+001F (Unit Separator) as the field + delimiter — a control character that cannot appear in commit + subjects, so we never have to escape or parse-around content. + """ + if not isinstance(count, int) or isinstance(count, bool) or count < 1: + raise ValueError(f"count must be a positive int, got {count!r}") + if count > _MAX_LOG_COUNT: + raise ValueError( + f"count must be <= {_MAX_LOG_COUNT}, got {count!r}" + ) + if not isinstance(ref, str) or not ref.strip(): + raise ValueError(f"ref must be a non-empty string, got {ref!r}") + # Reject refs starting with '-' so an agent-supplied ref can't smuggle + # a git option flag (e.g. `ref='--all'` would replace the explicit + # ref with a glob, `ref='-h'` would print help). The `--` terminator + # below also closes the same hole structurally; the explicit check + # produces a clearer error than letting git fail downstream. + stripped_ref = ref.strip() + if stripped_ref.startswith("-"): + raise ValueError( + f"ref must not start with '-' (would be parsed as a git option), got {ref!r}" + ) + + pretty = _LOG_FIELD_SEP.join(["%H", "%h", "%s", "%an", "%aI"]) + # `--` terminator separates options from positional arguments. Even + # if a future caller's ref slipped a leading '-' past the check, git + # would interpret everything after `--` as a pathspec or ref, not a + # flag. Belt and suspenders. + text = _run_git( + ["log", f"--pretty=format:{pretty}", f"-n{count}", stripped_ref, "--"], + path=path, + runner=runner, + ) + + commits: list[dict[str, Any]] = [] + # split("\n") rather than splitlines(): splitlines() treats \r / \v / + # \f / U+0085 / U+2028 / U+2029 as record terminators too, so a + # commit subject containing \r (legal in git; happens when an author + # pastes Windows-line-ended text into `git commit -F`) would + # fragment into two "lines" that each fail the parts-count check + # and the entire commit would silently vanish from the result. + for line in text.split("\n"): + if not line: + continue + parts = line.split(_LOG_FIELD_SEP) + if len(parts) != 5: + # Malformed line — skip rather than crash. Either git + # changed format unexpectedly or a commit message contained + # the separator character. Defensive but quiet. + continue + sha, short, subject, author, date = parts + # Reject all-empty (or no-SHA) records — a line of pure + # separators ("\x1f\x1f\x1f\x1f") would otherwise pass the + # parts-count guard and append a degenerate + # {"sha": "", "short": "", ...} entry. + if not sha: + continue + commits.append( + { + "sha": sha, + "short": short, + "subject": subject, + "author": author, + "date": date, + } + ) + return commits + + +# --------------------------------------------------------------------------- +# Uncommitted changes (staged diff + working diff + untracked file list) +# --------------------------------------------------------------------------- + + +# Maximum bytes returned per diff. A stray multi-GB generated file +# (binary blob, vendored deps, ML model checkpoint) accidentally staged +# in a monorepo would otherwise materialise the entire diff in RAM and +# ship it across the MCP boundary as a single string — almost certainly +# OOM-killing the MCP server. 1 MiB per diff is generous for any +# realistic code change and bounded enough that an agent receiving a +# truncated diff can ask for a `--stat` summary instead. +_MAX_DIFF_BYTES = 1024 * 1024 +_DIFF_TRUNCATION_MARKER = ( + "\n\n[... diff truncated by bb MCP server: exceeded " + f"{_MAX_DIFF_BYTES} bytes. Use `git diff --stat` for a summary.]\n" +) + +# Maximum number of path entries returned per file-list field +# (staged / modified / untracked / unmerged + git_uncommitted_changes' +# untracked_files). A repo that forgot to gitignore node_modules / +# .venv / target/ commonly has hundreds of thousands of untracked +# paths; capture_output materialises the full stdout in RAM and the +# JSON serialisation across MCP would amplify the cost. Matches the +# same defensive bound as _MAX_DIFF_BYTES / _MAX_LOG_COUNT. +_MAX_PATH_LIST = 10_000 + + +def _truncated_path_list(paths: list[str]) -> tuple[list[str], int]: + """Cap a path list at `_MAX_PATH_LIST` entries. + + Returns `(truncated_list, omitted_count)`. The truncated list + contains ONLY real paths — no sentinel string — so callers + iterating it with `os.stat` / `Path.exists()` / `os.path.join` + don't trip on a non-path entry. The omitted count goes into a + sibling `*_omitted` field on the parent dict so the agent can + detect truncation explicitly and fall back to a narrower query. + """ + if len(paths) <= _MAX_PATH_LIST: + return paths, 0 + return paths[:_MAX_PATH_LIST], len(paths) - _MAX_PATH_LIST + + +def _cap_diff(text: str) -> str: + """Truncate a diff string to `_MAX_DIFF_BYTES` if it exceeds the cap, + appending an explicit marker so the caller knows what happened. + + Fast path on `text.isascii()`: ASCII-only strings have byte count + equal to char count, so a `len(text) <= cap` check is sound. For + non-ASCII content (CJK, emoji, accented Latin) the bytes-vs-chars + ratio can be 2-4x, so we have to encode-and-measure. + + The prior round's fast path (`if len(text) <= cap: return text`) + was wrong by inversion: UTF-8 byte count is always >= char count, + so `chars <= cap` does NOT imply `bytes <= cap`. A 600K-emoji + string would pass the fast path but encode to 2.4 MiB. + """ + # ASCII fast path: chars == bytes, so the cheap len check is sound. + if text.isascii() and len(text) <= _MAX_DIFF_BYTES: + return text + encoded = text.encode("utf-8") + if len(encoded) <= _MAX_DIFF_BYTES: + return text + # Truncate at the byte cap then decode; errors="replace" handles the + # case where we sliced a multibyte character in half. + truncated = encoded[:_MAX_DIFF_BYTES].decode("utf-8", errors="replace") + return truncated + _DIFF_TRUNCATION_MARKER + + +def git_uncommitted_changes( + path: str | os.PathLike[str] | None = None, + *, + runner: Any = subprocess, +) -> dict[str, Any]: + """Return everything that hasn't been committed yet. + + Returned dict: + + { + "staged_diff": "<git diff --cached output>", + "working_diff": "<git diff output>", + "untracked_files": [path, ...], + "untracked_files_omitted": 0, + } + + Diff strings and the untracked list may be empty (`""` / `""` / + `[]`) when the working tree is clean. Diffs are returned as raw + unified-diff text so callers can either show them verbatim or + parse them further. + + Each diff is capped at `_MAX_DIFF_BYTES` (1 MiB) plus a small + truncation marker; diffs that exceed the cap are truncated and + the marker tells the caller (typically the MCP agent) to fall + back to `git diff --stat` or a path-narrowed diff. The + `untracked_files` list is capped at `_MAX_PATH_LIST` entries; + the `untracked_files_omitted` sibling field carries the count + of paths that were dropped (0 when the list fits). + """ + staged_diff = _cap_diff( + _run_git(["diff", "--cached"], path=path, runner=runner) + ) + working_diff = _cap_diff(_run_git(["diff"], path=path, runner=runner)) + untracked_text = _run_git( + ["ls-files", "--others", "--exclude-standard"], + path=path, + runner=runner, + ) + # split("\n") for the same reason as git_status / git_recent_commits: + # avoid splitlines() collapsing paths that contain \r etc. + untracked = [line for line in untracked_text.split("\n") if line] + untracked_capped, untracked_omitted = _truncated_path_list(untracked) + return { + "staged_diff": staged_diff, + "working_diff": working_diff, + "untracked_files": untracked_capped, + "untracked_files_omitted": untracked_omitted, + } diff --git a/mcp_server.py b/mcp_server.py new file mode 100644 index 0000000..a6d90bb --- /dev/null +++ b/mcp_server.py @@ -0,0 +1,1281 @@ +#!/usr/bin/env python3 +""" +bb MCP server — exposes Bitbucket Cloud operations to any Claude Code session +on this machine via the bb_ops / git_ops Python modules. + +The MCP agent gets tools covering: + - Pipelines: list, show, steps, trigger, stop, logs + - Pull requests: list, show, activity, create, approve, unapprove, merge, + decline, diff, comments-list, comment-add + - Repos / branches / vars / downloads / commits + - Git context: current_branch, status, remote_repo, recent_commits, + uncommitted_changes + - Meta: whoami + +Every Bitbucket tool accepts an optional `repo` argument: + - "" (empty) → auto-detect from `git remote get-url origin` in cwd + - "myrepo" → use BB_WORKSPACE from config + "myrepo" + - "acme/repo" → use "acme" workspace + "repo" slug (overrides config) + +Run as a subprocess (stdio transport): + python3 mcp_server.py + +The script self-bootstraps a venv under `$XDG_DATA_HOME/bitbucket-cli/venv` +(default `~/.local/share/bitbucket-cli/venv`) on first run, installs the +`mcp` package into it, then re-execs under that venv. Any python3 on +PATH that can run `python3 -m venv` works as the launcher (must be 3.10+; +on macOS prefer Homebrew or pyenv over Apple's bundled 3.9 at +/usr/bin/python3). The venv location is durable (survives reboot), so +subsequent launches re-exec into the existing venv without rebuilding. + +Register user-scope so every Claude Code session sees it: + claude mcp add --scope user bitbucket \\ + -- python3 /path/to/bitbucket-cli/mcp_server.py + +Environment overrides: + BB_USER, BB_TOKEN, BB_WORKSPACE — auth + workspace (see bb_api docs) + BB_API_BASE — Bitbucket REST base (default api.bitbucket.org/2.0) + BB_DEFAULT_REPO_PATH — git checkout dir for auto-detect (default: cwd) + BB_MCP_SKIP_BOOTSTRAP=1 — test escape hatch (skips venv + stubs FastMCP) + XDG_DATA_HOME — overrides the venv parent dir (default + ~/.local/share); the venv lives at + `$XDG_DATA_HOME/bitbucket-cli/venv` +""" + +from __future__ import annotations + +import os +import re +import subprocess +import sys +import urllib.parse +from pathlib import Path +from typing import Any + +# --------------------------------------------------------------------------- +# Self-bootstrap: ensure the venv exists and re-exec into it. Built under +# `$XDG_DATA_HOME/bitbucket-cli/venv` (default `~/.local/share/bitbucket-cli/ +# venv`) so it survives reboots — the previous `/tmp/bbenv` location would +# get wiped at every boot, forcing a fresh ~30s rebuild. The new location +# follows the XDG Base Directory spec and matches the pattern used by +# zenhub-cli (`~/.local/share/zenhub-cli/venv`). +# +# Must run before any third-party import (mcp). +# --------------------------------------------------------------------------- + + +def _xdg_data_home() -> Path: + """Return the resolved XDG data dir for app state. Honors + XDG_DATA_HOME when set (per the spec); falls back to + `~/.local/share`. Returned at module-import time so the path is + pinned for the rest of the bootstrap.""" + explicit = os.environ.get("XDG_DATA_HOME") + if explicit: + return Path(explicit) + return Path.home() / ".local" / "share" + + +_VENV_DIR = _xdg_data_home() / "bitbucket-cli" / "venv" +_VENV_PY = _VENV_DIR / "bin" / "python3" +# Sentinel file written ONLY after the full bootstrap (venv create + pip +# install) succeeds. If pip is Ctrl-C'd / OOM-killed / disk-full mid-run, +# _VENV_PY exists but `mcp` doesn't — without this sentinel, every +# subsequent launch would silently skip reinstall, re-exec into the +# broken venv, and die on `from mcp.server.fastmcp import FastMCP`. +_VENV_READY = _VENV_DIR / ".bbenv-ready" +# Pin to mcp>=1.0,<2 so a breaking mcp 2.x release doesn't silently +# install on a fresh-machine bootstrap (or a manual `rm` of the venv) +# and break every subsequent launch. Matches the pyproject.toml [mcp] +# extra. +_VENV_DEPS = ("mcp>=1.0,<2",) # No heavy deps (no torch / sentence-transformers). +_VENV_MIN_PY = (3, 10) # bb_api uses PEP 604 unions; mcp also needs >=3.10 + + +def _find_builder_python() -> str: + """Return a python3 executable suitable for building the venv. Prefer + the interpreter that invoked us; fall back to common Homebrew / pyenv / + system locations. Skips anything below `_VENV_MIN_PY`.""" + import shutil + + if sys.version_info >= _VENV_MIN_PY: + return sys.executable + candidates = [ + "/opt/homebrew/opt/pyenv/shims/python3", + os.path.expanduser("~/.pyenv/shims/python3"), + shutil.which("python3"), + "/opt/homebrew/bin/python3", + "/usr/local/bin/python3", + "/usr/bin/python3", + ] + probe = ( + "import sys; " + f"sys.exit(0 if sys.version_info >= {_VENV_MIN_PY} else 1)" + ) + seen: set[str] = set() + for cand in candidates: + if not cand or cand in seen or not os.path.exists(cand): + continue + seen.add(cand) + try: + subprocess.check_call( + [cand, "-c", probe], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + return cand + except (subprocess.CalledProcessError, OSError): + continue + raise RuntimeError( + f"No python3 >= {_VENV_MIN_PY[0]}.{_VENV_MIN_PY[1]} found to build " + f"{_VENV_DIR}; install one (e.g. via pyenv or `brew install python`) " + f"and retry." + ) + + +def _pip_install_or_diagnose(args: list[str]) -> None: + """Run pip install, capturing stderr so a failure surfaces with the + real diagnostic (network blip, version yank, SSL cert, proxy) rather + than `CalledProcessError: returned non-zero exit status 1`. + + Dropping `--quiet` AND capture_output=True so the user sees pip's + actual error in the bootstrap-failure message. + """ + try: + subprocess.run(args, check=True, capture_output=True, text=True) + except subprocess.CalledProcessError as e: + # Re-raise with the captured stderr inlined so the user can act + # on it. The original CalledProcessError loses pip's diagnostic. + diag = (e.stderr or e.stdout or "").strip() + raise RuntimeError( + f"[bb-mcp] pip install failed (exit {e.returncode}):\n{diag}" + ) from e + + +def _bootstrap_venv() -> None: + """Create the venv on first run, install deps, re-exec under it. + + Idempotent: a fully-bootstrapped venv (sentinel file present) + re-execs immediately. A partially-bootstrapped venv (venv exists + but sentinel doesn't — e.g. previous pip install was Ctrl-C'd or + OOM-killed) gets the pip install retried with no manual cleanup + needed. + + Creates parent directories if needed (e.g. on a fresh machine the + XDG data dir may not exist yet). + """ + if not _VENV_READY.exists(): + builder = _find_builder_python() + # Log to stderr so MCP stdio transport isn't corrupted. + print( + f"[bb-mcp] bootstrapping {_VENV_DIR} with {builder}", + file=sys.stderr, + ) + # Make sure the parent directory exists. On a fresh machine + # `~/.local/share` may exist but `~/.local/share/bitbucket-cli` + # won't yet — `python -m venv` doesn't create intermediate + # directories above the target. + _VENV_DIR.parent.mkdir(parents=True, exist_ok=True) + # Only create the venv if it doesn't already exist (a previous + # half-finished bootstrap left _VENV_PY in place). + if not _VENV_PY.exists(): + subprocess.check_call([builder, "-m", "venv", str(_VENV_DIR)]) + _pip_install_or_diagnose( + [str(_VENV_PY), "-m", "pip", "install", + "--no-cache-dir", "--upgrade", "pip"] + ) + _pip_install_or_diagnose( + [str(_VENV_PY), "-m", "pip", "install", + "--no-cache-dir", *_VENV_DEPS] + ) + # Sentinel last — any earlier failure leaves it absent so the + # next launch retries the install. + _VENV_READY.touch() + + # Detect "are we already running under the bootstrap venv?" via + # resolved sys.prefix. `python -m venv` on Linux/macOS symlinks + # the interpreter into the venv's bin/, and various platform path + # quirks (e.g. macOS's `/tmp -> /private/tmp`, `~/` resolution + # differences) mean we must resolve BOTH sides through realpath, + # not just rely on string equality. Without the resolve, the + # comparison can disagree on the same logical path and trigger + # an infinite execv loop. + if Path(sys.prefix).resolve() != _VENV_DIR.resolve(): + # Resolve __file__ so a relative-launch (`python3 mcp_server.py` + # from inside the repo) followed by any future chdir between + # launch and execv doesn't leave the venv python with an + # unresolvable script path. + os.execv( + str(_VENV_PY), + [str(_VENV_PY), str(Path(__file__).resolve()), *sys.argv[1:]], + ) + + +# Test-mode escape hatch: setting BB_MCP_SKIP_BOOTSTRAP=1 in the environment +# skips the venv bootstrap AND substitutes a minimal FastMCP stub for the +# import below. This lets the pytest suite exercise tool wiring + result-dict +# shapes without pulling in `mcp`. Production (the actual MCP server +# transport) must NEVER set this — without the real FastMCP, the server +# doesn't serve. +_MCP_SKIP_BOOTSTRAP = os.environ.get("BB_MCP_SKIP_BOOTSTRAP", "") == "1" + +if not _MCP_SKIP_BOOTSTRAP: + _bootstrap_venv() + try: + from mcp.server.fastmcp import FastMCP # type: ignore[import-not-found] + except ImportError as e: + # Sentinel-present launch found `mcp` missing — manual `pip + # uninstall`, partial filesystem cleanup that wiped the package + # dir but spared the touch file, or an image-layer accident. + # Tell the user the recovery path explicitly rather than + # letting them chase a bare ModuleNotFoundError. + raise ImportError( + f"[bb-mcp] FastMCP import failed even though {_VENV_READY} " + f"says the venv is ready ({e}). The mcp package was probably " + f"removed out-of-band. Recover with:\n" + f" rm {_VENV_READY}\n" + f"…then relaunch — bootstrap will reinstall. Or nuke the " + f"whole venv with `rm -rf {_VENV_DIR}` if state is corrupt." + ) from e +else: + # Minimal no-op stub. `@mcp.tool()` returns the function unchanged so + # tests can call the wrapped tool directly. The stub class is callable + # as `FastMCP("name")` and exposes a `.run()` that raises (we don't + # want a test accidentally launching a server). + class FastMCP: # type: ignore[no-redef] + def __init__(self, name: str) -> None: + self.name = name + self._tools: dict[str, Any] = {} + + def tool(self, *args: Any, **kwargs: Any): # noqa: ARG002 + def _decorator(fn: Any) -> Any: + self._tools[fn.__name__] = fn + return fn + return _decorator + + def run(self) -> None: + raise RuntimeError( + "FastMCP stub: BB_MCP_SKIP_BOOTSTRAP is set. The MCP " + "server cannot run in this mode; it's for unit tests only." + ) + + +# --------------------------------------------------------------------------- +# Path setup so sibling modules are importable when the bootstrap venv +# launches us from any cwd. +# --------------------------------------------------------------------------- + +HERE = Path(__file__).resolve().parent +if str(HERE) not in sys.path: + sys.path.insert(0, str(HERE)) + +import bb_api # noqa: E402 (must come after sys.path insert) +import bb_ops # noqa: E402 +import git_ops # noqa: E402 + + +# --------------------------------------------------------------------------- +# Shared client + repo resolution +# --------------------------------------------------------------------------- + +# Module-level client cache. load_config() reads from disk on every call; +# the MCP server is long-lived so we resolve once and reuse. Tests reset +# this via the _reset_client_cache() hook below. +_client_cache: bb_api.BBClient | None = None + + +def _get_client() -> bb_api.BBClient: + """Lazily construct (and cache) the BBClient from environment / config + files. Raises BBConfigError if required keys are missing.""" + global _client_cache + if _client_cache is None: + config = bb_api.load_config() + _client_cache = bb_api.BBClient(config) + return _client_cache + + +def _reset_client_cache() -> None: + """Test hook. Production never calls this — the cache lives for the + full server lifetime.""" + global _client_cache + _client_cache = None + + +def _default_repo_path() -> str: + """Working directory for git auto-detection. Priority: + 1. BB_DEFAULT_REPO_PATH environment variable + 2. Current working directory at MCP server launch time + + `os.environ.get("KEY", os.getcwd())` evaluates the default + eagerly — `os.getcwd()` would run even when the env var is set, + meaning the env-var override never actually protects against a + deleted cwd. Use `... or os.getcwd()` so the override is lazy. + """ + return os.environ.get("BB_DEFAULT_REPO_PATH") or os.getcwd() + + +def _resolve_repo(repo: str | None = "") -> tuple[bb_api.BBClient, str, str]: + """Resolve (client, workspace, repo_slug) from a single repo argument. + + Accepted shapes for `repo`: + - "" / None → auto-detect via `git remote get-url origin` + from BB_DEFAULT_REPO_PATH (or cwd). Workspace + + slug come from the remote URL. + - "myrepo" → use config workspace (BB_WORKSPACE) + "myrepo" + - "acme/myrepo" → use "acme" workspace + "myrepo" slug + (overrides BB_WORKSPACE for this call) + + Whitespace stripped on the whole arg AND on each slug-part after + split, so " acme/widget " AND "acme/ widget" both normalise to + ("acme", "widget"). A `None` from a deserialised JSON `null` is + treated the same as `""` (auto-detect path), not a crash. + + Validation happens BEFORE _get_client() so a malformed slug on a + fresh-machine user without ~/.config/bb/config surfaces as a clean + ValueError, not a BBConfigError that masks the real cause. + + Raises bb_api.BBConfigError on missing config (only AFTER repo is + validated). + Raises ValueError on malformed `repo` argument. + """ + # Normalise: None → "", strip whitespace. JSON `null` from the MCP + # client deserialises to None; without this guard, .strip() crashes + # uncaught with AttributeError. + repo = (repo or "").strip() + + if not repo: + # Auto-detect path. _get_client AFTER any structural validation. + client = _get_client() + workspace, repo_slug = git_ops.git_remote_repo(path=_default_repo_path()) + return client, workspace, repo_slug + + if "/" in repo: + # Strip every part to handle "acme/ widget" → ("acme", "widget"). + parts = [p.strip() for p in repo.split("/")] + if len(parts) != 2 or not parts[0] or not parts[1]: + raise ValueError( + f"repo must be 'workspace/repo' or 'repo'; got {repo!r}" + ) + # Symmetric with the bare-slug branch below — validate `.` / `..` + # in either segment BEFORE _get_client() so a malformed slug on + # a config-less machine surfaces as ValueError rather than the + # misleading BBConfigError. + if parts[0] in (".", "..") or parts[1] in (".", ".."): + raise ValueError( + f"workspace and repo must not be '.' or '..'; got {repo!r}" + ) + client = _get_client() + return client, parts[0], parts[1] + + # Bare slug → use configured workspace. Validate against the same + # rules bb_api.repo_path enforces (no `.` / `..`) BEFORE calling + # _get_client(), so a malformed slug on a config-less machine + # surfaces as ValueError rather than the misleading BBConfigError. + if repo in (".", ".."): + raise ValueError( + f"repo must not be '.' or '..'; got {repo!r}" + ) + client = _get_client() + return client, client.config.workspace, repo + + +# --------------------------------------------------------------------------- +# Error formatting +# --------------------------------------------------------------------------- + +# Every tool returns either {"ok": True, ...result} or {"ok": False, ...error}. +# Keeping a consistent shape means the MCP agent can branch on `ok` once and +# render the result vs. error path uniformly. + +# Match a URL with embedded credentials. `[^/]+@` is greedy up to the +# last `@` before the path so passwords containing literal `@` don't +# slip through the redactor. +_URL_CRED_PATTERN = re.compile(r"://[^/]+@") + +# SCP-style remote URLs (`user:token@host:path`) have no scheme prefix, +# so the regex above doesn't catch them. Match a `<user[:tok]>@<host>:` +# at the start of a line or after whitespace. +_SCP_CRED_PATTERN = re.compile(r"(^|\s)[^/:\s@]+(?::[^/@\s]*)?@(?=[^/\s]+:)") + +# Lowercase signed-URL indicators (compared against the lowercased +# query part). Covers: +# AWS: X-Amz-Signature / X-Amz-Credential +# GCP: X-Goog-Signature / X-Goog-Credential +# Azure: sig=, sv=, se= (SAS query parameters) +# Plain: Signature= (some non-AWS S3-compatible services) +# Bearer: access_token=, api_key= (URLs that embed bearer tokens) +# Tuples of trailing `=` so `sig=` doesn't match the trailing of +# `signature=` (which would over-match harmlessly anyway, but the +# specific patterns are clearer). +_SIGNED_URL_INDICATORS_LOWER = ( + "x-amz-signature=", "x-amz-credential=", + "x-goog-signature=", "x-goog-credential=", + "sig=", "signature=", + "access_token=", "api_key=", +) + + +def _redact_url(url: str) -> str: + """Strip URL-embedded credentials AND replace signed URLs whose + query string contains a meaningful credential parameter. Used in + `_error_dict` to defend against `pipeline_logs` / `pr_diff` + redirect chains landing on Bitbucket's signed S3 URLs — bb_api's + fetch_redirected_text follows the redirect and (on a downstream + failure like S3 clock skew → 403) raises BBApiError(url=<signed + S3 URL>). The signed URL embeds AWS credentials in the query and + must not flow into agent context or downstream logs. + + Case-insensitive query-param match so MinIO / R2 / Backblaze / + mixed-case AWS variants don't slip past. Covers AWS / GCP / Azure + SAS / generic Signature= / bearer-token-in-URL shapes. + """ + if not url: + return url + # `user:token@host` form (Bitbucket basic-auth-embedded URLs). + redacted = _URL_CRED_PATTERN.sub("://[redacted]@", url) + # Presigned-URL detection — if any of the credential-bearing query + # parameters are present, replace the whole query string with a + # marker. Path is preserved so the agent knows what host/path was + # called. + if "?" in redacted: + path_part, _, query_part = redacted.partition("?") + query_lower = query_part.lower() + if any(ind in query_lower for ind in _SIGNED_URL_INDICATORS_LOWER): + redacted = f"{path_part}?[redacted-signed-url-params]" + return redacted + + +# Match ANY URL scheme (http, https, ssh, git+ssh, etc.) so SCP-style +# variants and ssh:// URLs with embedded passphrases don't slip past +# free-form-text redaction. Stops at whitespace / quote / angle-bracket / +# closing-paren — covers URLs embedded in typical log / error shapes. +_ANY_URL_PATTERN = re.compile(r"(?:[a-zA-Z][a-zA-Z0-9+.-]*)://[^\s'\"<>)]+") + + +def _safe_text(text: str) -> str: + """Redact every URL-shaped substring AND SCP-style `user:tok@host:` + forms from a free-form text field. Applied uniformly to every + string field going into the error dict (message / body / stderr) + so a credential leak through ANY one of those fields requires a + new threat vector, not just a new field name. + + The previous rounds whack-a-moled fields one at a time: + - Round 2: redacted `url` (left `message` leaking) + - Round 3: redacted `message` (left `body` leaking) + - Round 4: this routes all three through one helper so the + leak class is structurally closed. + """ + if not text: + return text + def _sub_url(m: re.Match[str]) -> str: + return _redact_url(m.group(0)) + # Pass 1: redact URL-scheme forms (http://, https://, ssh://, git+ssh://, ...). + redacted = _ANY_URL_PATTERN.sub(_sub_url, text) + # Pass 2: SCP-style (user:tok@host:path) with no scheme prefix. + redacted = _SCP_CRED_PATTERN.sub(lambda m: f"{m.group(1)}[redacted]@", redacted) + return redacted + + +# Legacy alias retained for the existing test_bbapierror_redacts_signed_s3_url +# / test_bbapierror_redacts_embedded_creds tests. Renamed forwarder. +_redact_message = _safe_text + + +def _error_dict(e: Exception) -> dict[str, Any]: + """Translate any tool-side exception into a structured error dict. + + The agent sees `kind`, `message`, and (for BBApiError) the HTTP + status + redacted URL so it can branch on `kind == "BBApiError" + and status == 404` without parsing the message string. + + EVERY string field that could contain a URL or credential is + routed through `_safe_text`: + - message (str(e) embeds URL for BBApiError) + - url (BBApiError, dedicated url-only redactor) + - body (API response can echo the redirect target URL) + - stderr (GitOpError; git stderr commonly contains remote URLs + once Phase 4.7 wraps remote-touching commands) + """ + kind = type(e).__name__ + out: dict[str, Any] = { + "ok": False, + "kind": kind, + "message": _safe_text(str(e)), + } + if isinstance(e, bb_api.BBApiError): + out["status"] = e.status + out["url"] = _redact_url(e.url) + out["body"] = _safe_text(e.body) + elif isinstance(e, git_ops.GitOpError): + out["returncode"] = e.returncode + out["stderr"] = _safe_text(e.stderr) + return out + + +def _error_dict_with(e: Exception, **extras: Any) -> dict[str, Any]: + """Like `_error_dict` but threads caller-supplied identifiers + (pr_id, step_index, number, ...) into the error response so the + agent can correlate fan-out failures with their originating + requests. Without this, parallel pipeline_logs / pr_show calls + fail with no way to tell which call's error went to which result + slot.""" + return {**_error_dict(e), **extras} + + +# Exceptions every tool wraps. Other exceptions propagate (they're +# programmer errors and should crash visibly during development). +# +# Includes: +# - OSError covers IsADirectoryError, ConnectionResetError, +# BlockingIOError, ChildProcessError — paths git_ops._run_git +# doesn't wrap explicitly (only FileNotFoundError / +# NotADirectoryError / PermissionError do). Also catches +# os.getcwd() on a deleted cwd inside _default_repo_path(). +# - AttributeError covers the JSON-null-into-string-arg case where +# an MCP client sends {"repo": null} and `.strip()` would +# otherwise crash uncaught. +# +# Deliberately EXCLUDES TypeError — a refactor that renames a bb_ops +# kwarg should surface as an obvious dev-time crash, not a fake +# Bitbucket failure the agent reports back. The only intentional +# TypeError raise is in bb_api._validate_query_value, which is at +# a layer no MCP wrapper drives directly. +_TOOL_EXPECTED_EXCEPTIONS = ( + bb_api.BBApiError, + bb_api.BBConfigError, + bb_ops.BBOpNotFound, + git_ops.GitOpError, + OSError, + AttributeError, + ValueError, +) + + +# --------------------------------------------------------------------------- +# MCP server +# --------------------------------------------------------------------------- + +mcp = FastMCP("bb") + + +def _opt_str(value: str | None) -> str | None: + """Normalise an MCP-string-or-null arg to a non-empty stripped string + or None. Used for optional string parameters (branch, pattern, + query, message) so that "", " ", and None all funnel to None + rather than getting inconsistently reported as different errors + by the bb_ops layer.""" + if value is None: + return None + stripped = value.strip() + return stripped or None + + +# ============================================================================= +# PIPELINE TOOLS +# ============================================================================= + + +@mcp.tool() +def pipelines_list( + repo: str = "", + count: int = 10, + branch: str = "", + sort: str = "-created_on", +) -> dict[str, Any]: + """List recent Bitbucket pipelines (most-recent first by default). + + Args: + repo: Repo slug, "workspace/slug", or "" to auto-detect from git. + count: Maximum number of pipelines to return (paginates if > 100). + branch: Optional branch filter (e.g. "main", "feat/widget"). + sort: Sort key (default "-created_on" = newest first). + "created_on" for oldest first. + """ + try: + client, workspace, repo_slug = _resolve_repo(repo) + pipelines = bb_ops.pipelines_list( + client, workspace, repo_slug, + count=count, + branch=_opt_str(branch), + sort=sort, + ) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "pipelines": pipelines} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def pipeline_show(number: int, repo: str = "") -> dict[str, Any]: + """Fetch a single pipeline by build number.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + pipeline = bb_ops.pipeline_show(client, workspace, repo_slug, number) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "pipeline": pipeline} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, number=number) + + +@mcp.tool() +def pipeline_steps(number: int, repo: str = "") -> dict[str, Any]: + """List the steps of a pipeline by build number.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + steps = bb_ops.pipeline_steps(client, workspace, repo_slug, number) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "steps": steps} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, number=number) + + +@mcp.tool() +def pipeline_trigger( + branch: str, + repo: str = "", + pattern: str = "", + variables: dict[str, str] | None = None, +) -> dict[str, Any]: + """Trigger a new pipeline run. + + Args: + branch: Branch ref name (e.g. "main", "feat/widget"). + repo: Repo slug, "workspace/slug", or "" to auto-detect. + pattern: Custom pipeline name (matches `custom:` entries in + bitbucket-pipelines.yml). Empty for the branch's default + pipeline. + variables: Dict of {name: value} pairs to pass as pipeline + variables. Values must be strings. + """ + try: + client, workspace, repo_slug = _resolve_repo(repo) + # Strip the branch so " main" / "main " don't slip through to a + # 4xx with an opaque body. bb_ops.pipeline_trigger checks + # `if not branch` (catches empty) but not whitespace-only or + # trailing whitespace — symmetric with _opt_str() everywhere + # else, but required here (cannot funnel to None). + normalised_branch = (branch or "").strip() + if not normalised_branch: + raise ValueError( + f"branch is required and must be non-empty/non-whitespace; got {branch!r}" + ) + pipeline = bb_ops.pipeline_trigger( + client, workspace, repo_slug, + branch=normalised_branch, + pattern=_opt_str(pattern), + variables=variables, + ) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "pipeline": pipeline} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def pipeline_stop(number: int, repo: str = "") -> dict[str, Any]: + """Stop a running pipeline by build number.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + result = bb_ops.pipeline_stop(client, workspace, repo_slug, number) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "result": result} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, number=number) + + +@mcp.tool() +def pipeline_logs( + number: int, + step_index: int, + repo: str = "", + timeout: float = 120.0, +) -> dict[str, Any]: + """Fetch raw log text for a pipeline step (0-based step index). + + The log endpoint may return inline text or redirect to a signed S3 + URL; the underlying fetcher follows the redirect while stripping the + Bitbucket Authorization header on cross-host hops. + + Args: + number: Pipeline build number. + step_index: 0-based step position. + repo: Repo slug, "workspace/slug", or "" to auto-detect. + timeout: Per-call timeout in seconds (default 120). Bump for + pipelines with very large log payloads. + """ + try: + client, workspace, repo_slug = _resolve_repo(repo) + text = bb_ops.pipeline_logs( + client, workspace, repo_slug, number, step_index, + timeout=timeout, + ) + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "step_index": step_index, + "log": text, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, number=number, step_index=step_index) + + +# ============================================================================= +# PULL REQUEST TOOLS +# ============================================================================= + + +@mcp.tool() +def prs_list(repo: str = "", state: str = "OPEN", count: int = 25) -> dict[str, Any]: + """List pull requests filtered by state. + + Args: + repo: Repo slug, "workspace/slug", or "" to auto-detect. + state: OPEN, MERGED, DECLINED, or SUPERSEDED. + count: Maximum number of PRs to return. + """ + try: + client, workspace, repo_slug = _resolve_repo(repo) + prs = bb_ops.prs_list(client, workspace, repo_slug, state=state, count=count) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "prs": prs} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def pr_show(pr_id: int, repo: str = "") -> dict[str, Any]: + """Fetch a single pull request by ID.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + pr = bb_ops.pr_show(client, workspace, repo_slug, pr_id) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "pr": pr} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +@mcp.tool() +def pr_activity(pr_id: int, repo: str = "", count: int = 50) -> dict[str, Any]: + """List PR activity stream (approvals, comments, state transitions).""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + activity = bb_ops.pr_activity(client, workspace, repo_slug, pr_id, count=count) + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "pr_id": pr_id, + "activity": activity, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +@mcp.tool() +def pr_create( + title: str, + source_branch: str = "", + destination_branch: str = "main", + repo: str = "", + description: str = "", + close_source_branch: bool = True, + reviewers: list[str] | None = None, +) -> dict[str, Any]: + """Create a pull request. + + Args: + title: PR title (required). + source_branch: Source branch name. If empty/whitespace, + auto-detected via `git rev-parse --abbrev-ref HEAD`. + Detached HEAD / unborn-branch states are rejected + (git returns "HEAD" as the branch literal — not a + valid PR source). + destination_branch: Destination branch (default: "main"). + repo: Repo slug, "workspace/slug", or "" to auto-detect. + description: PR description (markdown). Empty/whitespace omitted. + close_source_branch: Delete the source branch on merge (default: True). + reviewers: Optional list of reviewer Bitbucket UUIDs (each + wrapped as `{"uuid": "..."}` in the payload). + """ + try: + client, workspace, repo_slug = _resolve_repo(repo) + # Normalise every string arg at the boundary. `(x or "").strip()` + # handles None (JSON null), whitespace-only, and trailing/leading + # whitespace uniformly. Symmetric with pipeline_trigger's branch + # handling (round 3 fix #2) and _resolve_repo's repo handling + # (round 2). Without this, "feat/widget " posts a 404, "Hi " + # ships a title with literal trailing whitespace, etc. + source_branch = (source_branch or "").strip() + destination_branch = (destination_branch or "main").strip() or "main" + normalised_title = (title or "").strip() + normalised_description = (description or "").strip() + if not normalised_title: + raise ValueError( + f"title is required and must be non-empty/non-whitespace; got {title!r}" + ) + + # Default source_branch to the current git branch when empty — + # matches the bash `bb pr-create` behaviour. + if not source_branch: + source_branch = git_ops.git_current_branch(path=_default_repo_path()) + # Reject "HEAD" regardless of whether it came from auto-detect + # or was supplied explicitly. Bitbucket would silently create a + # degenerate PR named after the literal `HEAD` ref. + if source_branch.strip() == "HEAD": + raise ValueError( + "source_branch cannot be 'HEAD' (detached HEAD / unborn " + "branch state). Pass a real branch name explicitly." + ) + pr = bb_ops.pr_create( + client, workspace, repo_slug, + title=normalised_title, + source_branch=source_branch, + destination_branch=destination_branch, + description=normalised_description, + close_source_branch=close_source_branch, + reviewers=reviewers, + ) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "pr": pr} + except _TOOL_EXPECTED_EXCEPTIONS as e: + # Thread title for parallel-call correlation (e.g. agent fanning + # out one pr_create per stacked branch in a PR train). Use the + # raw title so the agent can match against what it sent. + return _error_dict_with(e, title=title) + + +@mcp.tool() +def pr_approve(pr_id: int, repo: str = "") -> dict[str, Any]: + """Approve a pull request as the authenticated user.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + result = bb_ops.pr_approve(client, workspace, repo_slug, pr_id) + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "pr_id": pr_id, + "approval": result, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +@mcp.tool() +def pr_unapprove(pr_id: int, repo: str = "") -> dict[str, Any]: + """Remove the authenticated user's approval from a PR.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + result = bb_ops.pr_unapprove(client, workspace, repo_slug, pr_id) + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "pr_id": pr_id, + "result": result, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +@mcp.tool() +def pr_merge( + pr_id: int, + repo: str = "", + strategy: str = "merge_commit", + close_source_branch: bool = True, + message: str = "", +) -> dict[str, Any]: + """Merge a pull request. + + Strategies: merge_commit (default), squash, fast_forward. + """ + try: + client, workspace, repo_slug = _resolve_repo(repo) + result = bb_ops.pr_merge( + client, workspace, repo_slug, pr_id, + strategy=strategy, + close_source_branch=close_source_branch, + message=_opt_str(message), + ) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "pr": result} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +@mcp.tool() +def pr_decline(pr_id: int, repo: str = "") -> dict[str, Any]: + """Decline (close without merging) a pull request.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + result = bb_ops.pr_decline(client, workspace, repo_slug, pr_id) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "pr": result} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +@mcp.tool() +def pr_diff(pr_id: int, repo: str = "", timeout: float = 120.0) -> dict[str, Any]: + """Fetch the unified diff text for a pull request. + + Args: + pr_id: Pull request ID. + repo: Repo slug, "workspace/slug", or "" to auto-detect. + timeout: Per-call timeout in seconds (default 120). Bump for + very large PR diffs. + """ + try: + client, workspace, repo_slug = _resolve_repo(repo) + diff = bb_ops.pr_diff(client, workspace, repo_slug, pr_id, timeout=timeout) + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "pr_id": pr_id, + "diff": diff, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +@mcp.tool() +def pr_comments_list(pr_id: int, repo: str = "", count: int = 100) -> dict[str, Any]: + """List comments on a pull request.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + comments = bb_ops.pr_comments_list(client, workspace, repo_slug, pr_id, count=count) + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "pr_id": pr_id, + "comments": comments, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +@mcp.tool() +def pr_comment_add(pr_id: int, body: str, repo: str = "") -> dict[str, Any]: + """Add a top-level comment to a pull request.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + comment = bb_ops.pr_comment_add(client, workspace, repo_slug, pr_id, body) + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "pr_id": pr_id, + "comment": comment, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict_with(e, pr_id=pr_id) + + +# ============================================================================= +# REPO / BRANCH / VARS / DOWNLOADS / COMMITS TOOLS +# ============================================================================= + + +@mcp.tool() +def repos_list( + workspace: str = "", + count: int = 100, + sort: str = "-updated_on", + query: str = "", +) -> dict[str, Any]: + """List repositories in a workspace. + + Args: + workspace: Workspace slug. Empty = use BB_WORKSPACE from config. + count: Maximum number of repos to return. + sort: Sort key (default: most-recently-updated first). + query: Optional BBQL filter (e.g. 'name ~ "widget"'). + """ + try: + client = _get_client() + # Strip + fall back so " acme" / "acme " don't slip through and + # end up as `/repositories/%20acme` (404). Whitespace-only + # workspace falls back to the configured one. + ws = (workspace or "").strip() or client.config.workspace + repos = bb_ops.repos_list( + client, + workspace=ws, + count=count, + sort=sort, + query=_opt_str(query), + ) + return {"ok": True, "workspace": ws, "repos": repos} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def repo_show(repo: str = "") -> dict[str, Any]: + """Fetch repository metadata (language, size, clone URLs, etc.).""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + info = bb_ops.repo_show(client, workspace, repo_slug) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "info": info} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def branches_list( + repo: str = "", + count: int = 50, + sort: str = "-target.date", + query: str = "", +) -> dict[str, Any]: + """List branches in a repo, default sort is most-recently-updated first.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + branches = bb_ops.branches_list( + client, workspace, repo_slug, + count=count, + sort=sort, + query=_opt_str(query), + ) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "branches": branches} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def branch_show(name: str, repo: str = "") -> dict[str, Any]: + """Fetch a single branch by name. URL-encodes slashes in the name.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + branch = bb_ops.branch_show(client, workspace, repo_slug, name) + # Echo the stripped name so the response matches what Bitbucket + # actually resolved (bb_ops.branch_show strips before encoding). + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "name": name.strip(), + "branch": branch, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def vars_list(repo: str = "", count: int = 100) -> dict[str, Any]: + """List pipeline configuration variables. Secured values come back + as null from Bitbucket; the `secured` flag distinguishes that case.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + variables = bb_ops.vars_list(client, workspace, repo_slug, count=count) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "variables": variables} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def downloads_list(repo: str = "", count: int = 25) -> dict[str, Any]: + """List repository download artifacts.""" + try: + client, workspace, repo_slug = _resolve_repo(repo) + downloads = bb_ops.downloads_list(client, workspace, repo_slug, count=count) + return {"ok": True, "workspace": workspace, "repo": repo_slug, "downloads": downloads} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def commits_list(repo: str = "", branch: str = "", count: int = 10) -> dict[str, Any]: + """List recent commits. + + Args: + repo: Repo slug, "workspace/slug", or "" to auto-detect. + branch: Branch name. Empty = all branches via /commits. + count: Maximum number of commits to return. + """ + try: + client, workspace, repo_slug = _resolve_repo(repo) + normalised_branch = _opt_str(branch) + commits = bb_ops.commits_list( + client, workspace, repo_slug, + branch=normalised_branch, + count=count, + ) + return { + "ok": True, + "workspace": workspace, + "repo": repo_slug, + "branch": normalised_branch, + "commits": commits, + } + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +# ============================================================================= +# GIT CONTEXT TOOLS +# ============================================================================= + + +@mcp.tool() +def git_current_branch(path: str = "") -> dict[str, Any]: + """Return the current git branch name. Detached HEAD returns "HEAD".""" + try: + cwd = path or _default_repo_path() + branch = git_ops.git_current_branch(path=cwd) + return {"ok": True, "path": cwd, "branch": branch} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def git_status(path: str = "") -> dict[str, Any]: + """Return structured working-tree state (branch / upstream / ahead / behind / + clean / staged / modified / untracked / unmerged + *_omitted caps). + + The payload is keyed under `working_tree` rather than `status` to + avoid collision with the `status` field _error_dict uses for HTTP + status codes on BBApiError. Today the collision can't fire + (git_status doesn't raise BBApiError), but the rename pre-empts a + future broadening hazard. + """ + try: + cwd = path or _default_repo_path() + status = git_ops.git_status(path=cwd) + return {"ok": True, "path": cwd, "working_tree": status} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def git_remote_repo(path: str = "") -> dict[str, Any]: + """Return (workspace, repo_slug) parsed from the `origin` remote URL.""" + try: + cwd = path or _default_repo_path() + workspace, repo_slug = git_ops.git_remote_repo(path=cwd) + return {"ok": True, "path": cwd, "workspace": workspace, "repo": repo_slug} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def git_recent_commits(path: str = "", count: int = 10, ref: str = "HEAD") -> dict[str, Any]: + """List the most recent `count` commits reachable from `ref`.""" + try: + cwd = path or _default_repo_path() + # `(ref or "").strip() or "HEAD"`: None, "", and " " all + # funnel to the HEAD default. Previously whitespace-only collapsed + # to "" then errored, inconsistent with the empty-string success + # path. Same shape as `_opt_str` but with a fallback rather than + # None (ref is required by git_ops). + stripped_ref = (ref or "").strip() or "HEAD" + commits = git_ops.git_recent_commits(path=cwd, count=count, ref=stripped_ref) + return {"ok": True, "path": cwd, "ref": stripped_ref, "commits": commits} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +@mcp.tool() +def git_uncommitted_changes(path: str = "") -> dict[str, Any]: + """Return staged diff, working diff, and untracked-file list. Diffs + are capped at 1 MiB each; the untracked list is capped at 10000 + entries with the omitted count in `untracked_files_omitted`.""" + try: + cwd = path or _default_repo_path() + changes = git_ops.git_uncommitted_changes(path=cwd) + return {"ok": True, "path": cwd, "changes": changes} + except _TOOL_EXPECTED_EXCEPTIONS as e: + return _error_dict(e) + + +# ============================================================================= +# META TOOLS +# ============================================================================= + + +@mcp.tool() +def whoami() -> dict[str, Any]: + """Report the resolved Bitbucket user, workspace, API base, the + auto-detected git context for the current working directory, and a + workspace-reachability probe that confirms the credential reaches + the configured workspace. + + Does NOT echo the token. + + Three-phase: (1) config (fatal — flips ok=False on failure); + (2) git context (best-effort — failures stored as structured + sub-errors but don't flip ok=False, since the server is useful + even outside a git repo); (3) workspace reachability via a single + low-cost `GET /repositories/{workspace}?pagelen=1` with a 10 s + timeout (best-effort — failures recorded as `auth` payload but + don't flip ok=False, since config + git context are still useful + with a stale token). + + The reachability probe targets the workspace endpoint (not /user) + because Atlassian's workspace-scoped tokens — the now-recommended + shape — reject /user with 401/403 while serving the workspace + endpoint correctly, so a /user probe would false-negative valid + tokens. Note the converse trade-off: this endpoint requires + `repository:read` scope, so a workspace-scoped token granting only + `pipelines:read` or `pullrequest:read` will surface as + `auth.ok=False` even though pipeline / PR ops still work. No + single endpoint covers every scope; treat `auth.ok=False` as a + "this scope probably can't do repo listing" signal rather than as + a global credential verdict. + """ + out: dict[str, Any] = {"ok": True} + + # Phase 1: config. Wrap the full breadth of expected exceptions + # (including OSError for os.getcwd-on-deleted-cwd inside the + # whoami body itself). + client: bb_api.BBClient | None = None + try: + client = _get_client() + out["user"] = client.config.user + out["workspace"] = client.config.workspace + out["api_base"] = client.config.api_base + except _TOOL_EXPECTED_EXCEPTIONS as e: + err = _error_dict(e) + out.update({"ok": False, **err}) + + # Phase 2: git context. Use _TOOL_EXPECTED_EXCEPTIONS (not narrow + # GitOpError) so an unwrapped OSError from a deleted cwd inside + # _default_repo_path() lands here instead of escaping. Store + # failures as the full structured error dict so an agent + # branching on returncode / kind / stderr has the same shape + # available as every other tool. + try: + cwd = _default_repo_path() + except _TOOL_EXPECTED_EXCEPTIONS as e: + out["cwd_error"] = _error_dict(e) + cwd = None + else: + out["cwd"] = cwd + + if cwd is not None: + try: + out["git_branch"] = git_ops.git_current_branch(path=cwd) + except _TOOL_EXPECTED_EXCEPTIONS as e: + out["git_branch_error"] = _error_dict(e) + try: + ws, slug = git_ops.git_remote_repo(path=cwd) + out["git_workspace"] = ws + out["git_repo"] = slug + except _TOOL_EXPECTED_EXCEPTIONS as e: + out["git_remote_error"] = _error_dict(e) + + # Phase 3: workspace reachability. Skip if Phase 1 failed (no + # client to probe with). Single cheap GET; success means the + # credential is valid for the configured workspace right now. + if client is not None: + try: + client.get( + f"/repositories/{urllib.parse.quote(client.config.workspace, safe='')}", + query={"pagelen": "1"}, + timeout=10.0, + ) + out["auth"] = {"ok": True} + except _TOOL_EXPECTED_EXCEPTIONS as e: + out["auth"] = {"ok": False, **_error_dict(e)} + + return out + + +# --------------------------------------------------------------------------- +# Entrypoint +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + mcp.run() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..74f75b5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,60 @@ +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[project] +name = "bitbucket-cli" +description = "Bitbucket Cloud CLI (bb) and MCP server for agent-driven Bitbucket workflows." +readme = "README.md" +license = { file = "LICENSE" } +requires-python = ">=3.10" +dynamic = ["version"] +authors = [ + { name = "Daniel Pittman" }, +] +keywords = ["bitbucket", "cli", "mcp", "pipelines", "pull-requests", "developer-tools"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: POSIX", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Software Development :: Version Control", +] + +# Runtime dependencies are intentionally minimal. The MCP server self-bootstraps +# a venv at /tmp/bbenv on first run and installs `mcp` there; consumers who +# install via `pip install -e .` get the same package added explicitly via the +# `mcp` extra. The transport layer (bb_api) is stdlib-only. +dependencies = [] + +[project.optional-dependencies] +# Pull in the MCP runtime when installing for the MCP server use case: +# pip install -e ".[mcp]" +mcp = ["mcp >= 1.0"] + +# Test-time dependencies. +test = ["pytest >= 7"] + +[project.urls] +Homepage = "https://github.com/daniel-pittman/bitbucket-cli" +Repository = "https://github.com/daniel-pittman/bitbucket-cli" +Issues = "https://github.com/daniel-pittman/bitbucket-cli/issues" + +[tool.setuptools] +# Flat module layout — same shape as zenhub-cli. The bash `bb` script lives +# alongside; it is not a Python module. +py-modules = ["bb_api", "bb_ops", "git_ops", "mcp_server"] + +[tool.setuptools.dynamic] +version = { file = "VERSION" } + +[tool.pytest.ini_options] +testpaths = ["tests"] +# -ra: short summary of all non-pass outcomes +# --strict-markers: typo-proof @pytest.mark usage +addopts = "-ra --strict-markers" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..27f767e --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,33 @@ +""" +Shared pytest setup. + +Two responsibilities, both load-bearing: + +1. Set BB_MCP_SKIP_BOOTSTRAP=1 BEFORE mcp_server is imported. The MCP server + self-bootstraps a venv at /tmp/bbenv on first run; without this sentinel, + importing mcp_server during a pytest run would try to exec into that venv + and the test process would never come back. The sentinel must land in the + environment before any test module's `import mcp_server`, which is why it + lives in conftest.py at the package root. + +2. Inject the repo root into sys.path so test modules can `import bb_api`, + `import bb_ops`, etc. with the project as a flat module layout (mirroring + the existing `bb` script's "one directory holds everything" shape). +""" + +from __future__ import annotations + +import os +import sys +from pathlib import Path + +# Set unconditionally. `setdefault` would be a no-op if a developer had +# `export BB_MCP_SKIP_BOOTSTRAP=0` in their shell, defeating the guard +# silently and hanging the test process when mcp_server tries to exec +# into its bootstrap venv. The guard must take effect for the test run +# regardless of inherited environment. +os.environ["BB_MCP_SKIP_BOOTSTRAP"] = "1" + +_REPO_ROOT = Path(__file__).resolve().parent.parent +if str(_REPO_ROOT) not in sys.path: + sys.path.insert(0, str(_REPO_ROOT)) diff --git a/tests/test_bb_api.py b/tests/test_bb_api.py new file mode 100644 index 0000000..0dbf2d4 --- /dev/null +++ b/tests/test_bb_api.py @@ -0,0 +1,767 @@ +""" +Tests for bb_api. + +Discipline: every test that touches HTTP asserts the request URL, method, +auth header, and JSON body shape — not just the response status. This catches +the "mock returns 200 regardless of request body" anti-pattern called out in +the testing methodology. If one of these tests surfaces a bug, the fix lands +in bb_api.py AND in the bash `bb` script if `bb` has parallel logic. + +All fixture data is fictional: workspace `acme`, repo `widget-service`, +users `alice` / `bob`. Real personal identifiers must not appear in this +repository (see CONTRIBUTING.md). +""" + +from __future__ import annotations + +import base64 +import io +import json +import urllib.error +from pathlib import Path +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock + +import pytest + +import bb_api +from bb_api import ( + BBApiError, + BBClient, + BBConfig, + BBConfigError, + DEFAULT_API_BASE, + detect_repo, + load_config, + parse_remote_url, + repo_path, +) + + +# ========================================================================= +# load_config +# ========================================================================= + + +def _write_config(tmp_path: Path, body: str) -> Path: + p = tmp_path / "config" + p.write_text(body) + return p + + +def test_load_config_env_only(tmp_path: Path) -> None: + cfg = load_config( + env={ + "BB_USER": "alice@example.com", + "BB_TOKEN": "tok-xyz", + "BB_WORKSPACE": "acme", + }, + config_path=tmp_path / "does-not-exist", + ) + assert cfg.user == "alice@example.com" + assert cfg.token == "tok-xyz" + assert cfg.workspace == "acme" + assert cfg.api_base == DEFAULT_API_BASE + + +def test_load_config_file_only(tmp_path: Path) -> None: + cfg_path = _write_config( + tmp_path, + "BB_USER=bob@example.com\n" + "BB_TOKEN=tok-from-file\n" + "BB_WORKSPACE=widget-co\n", + ) + cfg = load_config(env={}, config_path=cfg_path) + assert cfg.user == "bob@example.com" + assert cfg.token == "tok-from-file" + assert cfg.workspace == "widget-co" + + +def test_load_config_env_overrides_file(tmp_path: Path) -> None: + cfg_path = _write_config( + tmp_path, + "BB_USER=bob@example.com\n" + "BB_TOKEN=file-token\n" + "BB_WORKSPACE=widget-co\n", + ) + cfg = load_config( + env={"BB_TOKEN": "env-token"}, + config_path=cfg_path, + ) + assert cfg.user == "bob@example.com" # from file + assert cfg.token == "env-token" # env wins + assert cfg.workspace == "widget-co" # from file + + +def test_load_config_handles_export_prefix(tmp_path: Path) -> None: + cfg_path = _write_config( + tmp_path, + "export BB_USER=alice@example.com\n" + "export BB_TOKEN=tok\n" + "BB_WORKSPACE=acme\n", + ) + cfg = load_config(env={}, config_path=cfg_path) + assert cfg.user == "alice@example.com" + assert cfg.token == "tok" + + +def test_load_config_handles_quotes(tmp_path: Path) -> None: + cfg_path = _write_config( + tmp_path, + 'BB_USER="alice@example.com"\n' + "BB_TOKEN='tok with spaces'\n" + "BB_WORKSPACE=acme\n", + ) + cfg = load_config(env={}, config_path=cfg_path) + assert cfg.user == "alice@example.com" + assert cfg.token == "tok with spaces" + + +def test_load_config_ignores_blank_and_comment_lines(tmp_path: Path) -> None: + cfg_path = _write_config( + tmp_path, + "# Bitbucket config\n" + "\n" + "BB_USER=alice@example.com\n" + " # indented comment\n" + "BB_TOKEN=tok\n" + "BB_WORKSPACE=acme\n", + ) + cfg = load_config(env={}, config_path=cfg_path) + assert cfg.user == "alice@example.com" + + +def test_load_config_dotenv_overrides_home_config(tmp_path: Path) -> None: + """Mirrors bash's load order: ~/.config/bb/config is sourced first, + then .env, so .env overwrites the home config. Repo-local .env is + intentionally the highest-priority file source because it's the + development-override knob.""" + cfg_path = tmp_path / "home.cfg" + cfg_path.write_text( + "BB_USER=alice@example.com\nBB_TOKEN=home-tok\nBB_WORKSPACE=acme\n" + ) + dotenv = tmp_path / ".env" + dotenv.write_text("BB_TOKEN=dotenv-tok\nBB_WORKSPACE=widget-co\n") + + cfg = load_config(env={}, config_path=cfg_path, dotenv_path=dotenv) + # .env wins. user is only in home, so it's carried through unchanged. + assert cfg.user == "alice@example.com" + assert cfg.token == "dotenv-tok" + assert cfg.workspace == "widget-co" + + +def test_load_config_env_beats_dotenv(tmp_path: Path) -> None: + """Process env still wins over .env, which still wins over home config.""" + cfg_path = tmp_path / "home.cfg" + cfg_path.write_text( + "BB_USER=alice@example.com\nBB_TOKEN=home-tok\nBB_WORKSPACE=acme\n" + ) + dotenv = tmp_path / ".env" + dotenv.write_text("BB_TOKEN=dotenv-tok\n") + + cfg = load_config( + env={"BB_TOKEN": "env-tok"}, + config_path=cfg_path, + dotenv_path=dotenv, + ) + assert cfg.token == "env-tok" + + +def test_load_config_empty_env_var_does_not_fall_through(tmp_path: Path) -> None: + """An explicitly-set empty env var should NOT silently let the file + value through (the old `or` falsy-coalesce did this). The required-key + check then catches the empty value and raises.""" + cfg_path = tmp_path / "home.cfg" + cfg_path.write_text( + "BB_USER=alice@example.com\nBB_TOKEN=file-tok\nBB_WORKSPACE=acme\n" + ) + with pytest.raises(BBConfigError, match="BB_TOKEN"): + load_config(env={"BB_TOKEN": ""}, config_path=cfg_path) + + +def test_load_config_normalises_trailing_slash_on_api_base(tmp_path: Path) -> None: + cfg = load_config( + env={ + "BB_USER": "alice@example.com", + "BB_TOKEN": "tok", + "BB_WORKSPACE": "acme", + "BB_API_BASE": "https://bitbucket.example.com/2.0/", + }, + config_path=tmp_path / "nope", + ) + # No trailing slash — guards against `api_base + "/path"` producing "//path". + assert cfg.api_base == "https://bitbucket.example.com/2.0" + + +def test_load_config_missing_keys_lists_all(tmp_path: Path) -> None: + with pytest.raises(BBConfigError) as exc: + load_config(env={}, config_path=tmp_path / "no-such-file") + msg = str(exc.value) + assert "BB_USER" in msg + assert "BB_TOKEN" in msg + assert "BB_WORKSPACE" in msg + + +def test_load_config_custom_api_base(tmp_path: Path) -> None: + cfg = load_config( + env={ + "BB_USER": "alice@example.com", + "BB_TOKEN": "tok", + "BB_WORKSPACE": "acme", + "BB_API_BASE": "https://bitbucket.example.com/2.0", + }, + config_path=tmp_path / "nope", + ) + assert cfg.api_base == "https://bitbucket.example.com/2.0" + + +# ========================================================================= +# parse_remote_url + detect_repo +# ========================================================================= + + +@pytest.mark.parametrize( + "url,expected", + [ + ("https://bitbucket.org/acme/widget-service.git", ("acme", "widget-service")), + ("https://bitbucket.org/acme/widget-service", ("acme", "widget-service")), + ("git@bitbucket.org:acme/widget-service.git", ("acme", "widget-service")), + ("git@bitbucket.org:acme/widget-service", ("acme", "widget-service")), + ( + "https://alice@bitbucket.org/acme/widget-service.git", + ("acme", "widget-service"), + ), + ( + "https://bitbucket.org/acme/widget-service.git/", + ("acme", "widget-service"), + ), + ( + # Repo slug with a dot inside should not be truncated at the dot. + "https://bitbucket.org/acme/my.cool.repo.git", + ("acme", "my.cool.repo"), + ), + ], +) +def test_parse_remote_url_known_shapes(url: str, expected: tuple[str, str]) -> None: + assert parse_remote_url(url) == expected + + +def test_parse_remote_url_returns_none_for_unparseable() -> None: + assert parse_remote_url("not-a-url") is None + assert parse_remote_url("") is None + + +def _fake_runner( + *, returncode: int = 0, stdout: str = "", stderr: str = "" +) -> Any: + """Build a stand-in for the `subprocess` module that load_config / + detect_repo can call via its `runner=` parameter. We only need .run.""" + + class _Fake: + @staticmethod + def run(*args: Any, **kwargs: Any) -> Any: + return SimpleNamespace( + args=args[0] if args else kwargs.get("args"), + returncode=returncode, + stdout=stdout, + stderr=stderr, + ) + + return _Fake + + +def test_detect_repo_https_remote() -> None: + runner = _fake_runner(stdout="https://bitbucket.org/acme/widget-service.git\n") + assert detect_repo(runner=runner) == "widget-service" + + +def test_detect_repo_ssh_remote() -> None: + runner = _fake_runner(stdout="git@bitbucket.org:acme/widget-service.git\n") + assert detect_repo(runner=runner) == "widget-service" + + +def test_detect_repo_invokes_git_remote_get_url() -> None: + """Verifies the exact subprocess call shape. A future refactor that + changes the command (e.g. to `git config remote.origin.url`), drops + text=True, or skips cwd propagation, would silently break — without + this assertion the canned-output tests would still pass. + """ + captured: dict[str, Any] = {} + + class _Recording: + @staticmethod + def run(args: Any, **kwargs: Any) -> Any: + captured["args"] = args + captured["kwargs"] = kwargs + return SimpleNamespace( + returncode=0, + stdout="https://bitbucket.org/acme/widget-service.git\n", + stderr="", + ) + + assert detect_repo(path="/some/dir", runner=_Recording) == "widget-service" + assert captured["args"] == ["git", "remote", "get-url", "origin"] + assert captured["kwargs"]["capture_output"] is True + assert captured["kwargs"]["text"] is True + assert captured["kwargs"]["cwd"] == "/some/dir" + assert captured["kwargs"]["check"] is False + + +def test_detect_repo_not_a_git_dir() -> None: + runner = _fake_runner(returncode=128, stderr="fatal: not a git repository\n") + with pytest.raises(BBConfigError, match="Not a git repository"): + detect_repo(runner=runner) + + +def test_detect_repo_unparseable_remote() -> None: + runner = _fake_runner(stdout="totally-not-a-url\n") + with pytest.raises(BBConfigError, match="Could not parse"): + detect_repo(runner=runner) + + +def test_detect_repo_no_git_binary() -> None: + class _NoGit: + @staticmethod + def run(*_args: Any, **_kwargs: Any) -> Any: + raise FileNotFoundError("git not on PATH") + + with pytest.raises(BBConfigError, match="git executable not found"): + detect_repo(runner=_NoGit) + + +# ========================================================================= +# repo_path +# ========================================================================= + + +def test_repo_path_simple() -> None: + assert repo_path("acme", "widget-service") == "/repositories/acme/widget-service" + + +def test_repo_path_rejects_slashes() -> None: + with pytest.raises(ValueError): + repo_path("acme/sub", "widget") + with pytest.raises(ValueError): + repo_path("acme", "widget/sub") + + +@pytest.mark.parametrize("bad", ["", " ", "\t\n"]) +def test_repo_path_rejects_empty_or_whitespace(bad: str) -> None: + with pytest.raises(ValueError, match="non-empty"): + repo_path(bad, "widget") + with pytest.raises(ValueError, match="non-empty"): + repo_path("acme", bad) + + +@pytest.mark.parametrize("bad", [".", ".."]) +def test_repo_path_rejects_dot_segments(bad: str) -> None: + """`/repositories/../widget` after URL normalisation can resolve to + `/repositories/widget` with the wrong workspace — path-traversal.""" + with pytest.raises(ValueError, match=r"'\.'|'\.\.'"): + repo_path(bad, "widget") + with pytest.raises(ValueError, match=r"'\.'|'\.\.'"): + repo_path("acme", bad) + + +# ========================================================================= +# BBClient HTTP transport +# ========================================================================= + + +class _CaptureOpener: + """Fake urllib opener that records each request and returns canned JSON. + + Tests use this in place of a real network. Each call to .open() pops one + response off `responses` and records the Request the caller built so the + test can assert on URL, method, headers, and body. + """ + + def __init__(self, responses: list[dict[str, Any]]): + self.responses = list(responses) + self.calls: list[dict[str, Any]] = [] + + def open(self, req: urllib.request.Request, timeout: float = 30.0) -> Any: + body = req.data + # urllib.request.Request stores header keys via `.capitalize()` + # ("Content-Type" -> "Content-type"). Re-titlecase here so test + # assertions can use the conventional canonical form. + normalised_headers = {k.title(): v for k, v in req.header_items()} + self.calls.append( + { + "url": req.full_url, + "method": req.get_method(), + "headers": normalised_headers, + "body": json.loads(body.decode("utf-8")) if body else None, + "timeout": timeout, + } + ) + if not self.responses: + raise AssertionError( + f"opener received an unexpected request: {req.get_method()} {req.full_url}" + ) + resp = self.responses.pop(0) + body_bytes = json.dumps(resp).encode("utf-8") if resp is not None else b"" + return _FakeResponse(body_bytes) + + +class _FakeResponse: + def __init__(self, body: bytes): + self._body = body + + def read(self) -> bytes: + return self._body + + def __enter__(self) -> "_FakeResponse": + return self + + def __exit__(self, *_: Any) -> None: + pass + + +def _client(opener: _CaptureOpener) -> BBClient: + cfg = BBConfig( + user="alice@example.com", + token="tok-xyz", + workspace="acme", + api_base=DEFAULT_API_BASE, + ) + return BBClient(cfg, opener=opener) + + +def _expected_basic_auth(user: str, token: str) -> str: + return "Basic " + base64.b64encode(f"{user}:{token}".encode()).decode() + + +def test_get_constructs_correct_url_and_auth() -> None: + opener = _CaptureOpener([{"slug": "widget-service"}]) + client = _client(opener) + result = client.get("/repositories/acme/widget-service") + assert result == {"slug": "widget-service"} + assert len(opener.calls) == 1 + call = opener.calls[0] + assert call["url"] == DEFAULT_API_BASE + "/repositories/acme/widget-service" + assert call["method"] == "GET" + assert call["headers"]["Authorization"] == _expected_basic_auth( + "alice@example.com", "tok-xyz" + ) + assert call["headers"]["Accept"] == "application/json" + assert call["body"] is None + + +def test_get_with_query_params() -> None: + opener = _CaptureOpener([{"values": []}]) + client = _client(opener) + client.get( + "/repositories/acme/widget-service/pullrequests", + query={"state": "OPEN", "pagelen": 25, "skip": None}, + ) + url = opener.calls[0]["url"] + assert url.startswith( + DEFAULT_API_BASE + "/repositories/acme/widget-service/pullrequests?" + ) + # None values are dropped. + assert "skip=" not in url + assert "state=OPEN" in url + assert "pagelen=25" in url + + +def test_post_sends_json_body() -> None: + opener = _CaptureOpener([{"id": 42}]) + client = _client(opener) + client.post( + "/repositories/acme/widget-service/pullrequests", + json_body={ + "title": "Add widget", + "source": {"branch": {"name": "feat/widget"}}, + "destination": {"branch": {"name": "main"}}, + }, + ) + call = opener.calls[0] + assert call["method"] == "POST" + assert call["headers"]["Content-Type"] == "application/json" + # Assert FULL body shape, not just presence — guards against the + # mock-returns-success-regardless-of-body anti-pattern. + assert call["body"] == { + "title": "Add widget", + "source": {"branch": {"name": "feat/widget"}}, + "destination": {"branch": {"name": "main"}}, + } + + +def test_post_without_body() -> None: + opener = _CaptureOpener([{"approved": True}]) + client = _client(opener) + client.post("/repositories/acme/widget-service/pullrequests/1/approve") + call = opener.calls[0] + assert call["method"] == "POST" + assert call["body"] is None + # No Content-Type when there's no body. + assert "Content-Type" not in call["headers"] + + +def test_put_sends_json_body() -> None: + opener = _CaptureOpener([{"ok": True}]) + client = _client(opener) + client.put("/some/path", json_body={"x": 1}) + call = opener.calls[0] + assert call["method"] == "PUT" + assert call["body"] == {"x": 1} + + +def test_delete_no_body() -> None: + opener = _CaptureOpener([None]) + client = _client(opener) + result = client.delete("/repositories/acme/widget-service/pullrequests/1") + assert result is None + call = opener.calls[0] + assert call["method"] == "DELETE" + assert call["body"] is None + + +def test_http_error_surfaces_as_bbapierror() -> None: + class _ErrorOpener: + def open(self, req: urllib.request.Request, timeout: float = 30.0) -> Any: + raise urllib.error.HTTPError( + url=req.full_url, + code=404, + msg="Not Found", + hdrs=None, # type: ignore[arg-type] + fp=io.BytesIO(b'{"error": {"message": "Repository not found"}}'), + ) + + client = _client(_CaptureOpener([])) + client._opener = _ErrorOpener() # type: ignore[assignment] + with pytest.raises(BBApiError) as exc: + client.get("/repositories/acme/missing") + assert exc.value.status == 404 + assert "Repository not found" in exc.value.body + + +def test_urlerror_surfaces_as_bbapierror_with_status_zero() -> None: + """A DNS / TLS / connection / timeout failure raises urllib.error.URLError + (which is HTTPError's parent class). Without an explicit handler, this + would escape the BBApiError contract. Wrap with status=0 (the documented + transport-error sentinel).""" + + class _NetworkDownOpener: + def open(self, req: urllib.request.Request, timeout: float = 30.0) -> Any: + raise urllib.error.URLError("Name or service not known") + + client = _client(_CaptureOpener([])) + client._opener = _NetworkDownOpener() # type: ignore[assignment] + with pytest.raises(BBApiError) as exc: + client.get("/repos") + assert exc.value.status == 0 + assert "network error" in exc.value.body.lower() + + +def test_request_uses_default_timeout() -> None: + opener = _CaptureOpener([{"ok": True}]) + cfg = BBConfig( + user="alice@example.com", token="tok", workspace="acme", api_base=DEFAULT_API_BASE + ) + client = BBClient(cfg, opener=opener, timeout=12.5) + client.get("/repos") + assert opener.calls[0]["timeout"] == 12.5 + + +def test_request_per_call_timeout_overrides_default() -> None: + """A long-running call (log streaming, large diff) needs to extend the + default timeout. Each public method exposes a per-call override.""" + opener = _CaptureOpener([{"ok": True}, {"ok": True}, {"ok": True}, None]) + cfg = BBConfig( + user="alice@example.com", token="tok", workspace="acme", api_base=DEFAULT_API_BASE + ) + client = BBClient(cfg, opener=opener, timeout=5.0) + client.get("/a", timeout=120.0) + client.post("/b", json_body={"x": 1}, timeout=60.0) + client.put("/c", json_body={"y": 2}, timeout=30.0) + client.delete("/d", timeout=15.0) + assert [c["timeout"] for c in opener.calls] == [120.0, 60.0, 30.0, 15.0] + + +@pytest.mark.parametrize( + "bad_value", + [{"nested": "dict"}, ["list", {"of": "stuff"}], object()], +) +def test_query_rejects_non_scalar_values(bad_value: Any) -> None: + opener = _CaptureOpener([]) + client = _client(opener) + with pytest.raises(TypeError, match="query"): + client.get("/repos", query={"q": bad_value}) + # Confirm nothing went out on the wire. + assert opener.calls == [] + + +def test_query_accepts_scalar_list() -> None: + opener = _CaptureOpener([{"values": []}]) + client = _client(opener) + client.get("/repos", query={"tag": ["a", "b"]}) + url = opener.calls[0]["url"] + # urllib serialises lists as repeated query keys with doseq=True. + assert "tag=a" in url + assert "tag=b" in url + + +def test_default_opener_refuses_redirects() -> None: + """The default BBClient opener installs _NoRedirectHandler so a 3xx + response surfaces as an HTTPError rather than causing urllib to + resubmit the Authorization header to the redirect's Location URL. + + This is what the bash side does by default (curl without -L). Endpoints + that legitimately redirect (e.g. log streaming to S3) need a separate + code path that strips Authorization on cross-host hops; that work lands + when bb_ops adds pipeline_logs. + """ + cfg = BBConfig( + user="alice@example.com", token="tok", workspace="acme", api_base=DEFAULT_API_BASE + ) + client = BBClient(cfg) # default opener + handler_classes = [type(h).__name__ for h in client._opener.handlers] + # The custom handler is installed; the stock HTTPRedirectHandler is + # replaced (urllib keys handlers by class hierarchy, so installing a + # subclass supersedes the default). + assert "_NoRedirectHandler" in handler_classes + + +def test_no_redirect_handler_returns_none() -> None: + """Direct unit test of the redirect handler's contract: returning None + from redirect_request tells urllib NOT to follow the redirect and + instead surface the 3xx as an HTTPError. The wider behaviour is + confirmed by inspection of urllib's HTTPRedirectHandler base class; + here we just pin that our subclass does the override correctly. + """ + handler = bb_api._NoRedirectHandler() + fake_req = urllib.request.Request("https://api.bitbucket.org/2.0/repos") + result = handler.redirect_request( + fake_req, + io.BytesIO(b""), + 302, + "Found", + {"Location": "https://evil.example.com/"}, + "https://evil.example.com/", + ) + assert result is None + + +# ========================================================================= +# Pagination +# ========================================================================= + + +def test_paginate_walks_pages() -> None: + base = DEFAULT_API_BASE + "/repositories/acme/widget-service/pullrequests" + opener = _CaptureOpener( + [ + {"values": [{"id": 1}, {"id": 2}], "next": base + "?page=2"}, + {"values": [{"id": 3}], "next": base + "?page=3"}, + {"values": [{"id": 4}]}, # last page, no `next` + ] + ) + client = _client(opener) + items = list(client.paginate("/repositories/acme/widget-service/pullrequests")) + assert [i["id"] for i in items] == [1, 2, 3, 4] + # Assert URL for each page, not just call count. Without this a regression + # that refetches the original URL three times would still consume the three + # queued responses and pass — exactly the "mock returns success regardless + # of request" anti-pattern. + assert len(opener.calls) == 3 + assert opener.calls[0]["url"] == base + assert opener.calls[1]["url"] == base + "?page=2" + assert opener.calls[2]["url"] == base + "?page=3" + + +def test_paginate_stops_on_stuck_cursor() -> None: + # Server returns the same `next` URL twice — defend against the loop. + base = DEFAULT_API_BASE + "/repos" + opener = _CaptureOpener( + [ + {"values": [{"id": 1}], "next": base + "?page=2"}, + {"values": [{"id": 2}], "next": base + "?page=2"}, # stuck + ] + ) + client = _client(opener) + items = list(client.paginate("/repos")) + assert [i["id"] for i in items] == [1, 2] + # Two requests, then the cursor-equality check breaks the loop before + # the third request goes out. + assert len(opener.calls) == 2 + + +def test_paginate_iteration_cap_raises() -> None: + base = DEFAULT_API_BASE + "/repos" + # Each response advances the cursor by one, so stuck-cursor detection + # never trips. We bound the test at a low max_iterations to keep it fast. + responses = [ + {"values": [{"i": i}], "next": f"{base}?page={i + 2}"} for i in range(10) + ] + opener = _CaptureOpener(responses) + client = _client(opener) + with pytest.raises(BBApiError, match="exceeded"): + list(client.paginate("/repos", max_iterations=5)) + + +def test_paginate_refuses_cross_host_next() -> None: + """If `next` points at a different host than api_base, we refuse to + follow it. Defends against an upstream-bug or man-in-the-middle scenario + where the cursor URL has been mangled.""" + opener = _CaptureOpener( + [ + { + "values": [{"id": 1}], + "next": "https://evil.example.com/2.0/repos?page=2", + }, + ] + ) + client = _client(opener) + with pytest.raises(BBApiError, match="host mismatch"): + list(client.paginate("/repos")) + + +def test_paginate_refuses_prefix_trick_next() -> None: + """A `next` URL that string-prefixes api_base but continues into a + different host slips past a naive `startswith()` check. Defends by + requiring the separator after api_base to be `/` or `?`.""" + sneaky = DEFAULT_API_BASE + "evil.example.com/repos?page=2" + opener = _CaptureOpener( + [ + {"values": [{"id": 1}], "next": sneaky}, + ] + ) + client = _client(opener) + with pytest.raises(BBApiError, match="host mismatch"): + list(client.paginate("/repos")) + + +def test_paginate_missing_values_key_raises() -> None: + """A malformed page (or proxy-corrupted response) without `values` should + be loud, not silently treated as empty. The previous .get('values', []) + would have advanced through `next` with a silent hole in the result set.""" + opener = _CaptureOpener( + [ + {"next": DEFAULT_API_BASE + "/repos?page=2"}, # no `values` + ] + ) + client = _client(opener) + with pytest.raises(BBApiError, match="missing 'values'"): + list(client.paginate("/repos")) + + +def test_paginate_non_string_next_raises() -> None: + """`next: 123` would otherwise crash on `.startswith()` with + AttributeError, bypassing the BBApiError contract.""" + opener = _CaptureOpener( + [ + {"values": [{"id": 1}], "next": 12345}, + ] + ) + client = _client(opener) + with pytest.raises(BBApiError, match="must be a string"): + list(client.paginate("/repos")) + + +def test_paginate_yields_nothing_on_empty_first_page() -> None: + opener = _CaptureOpener([{"values": []}]) + client = _client(opener) + assert list(client.paginate("/repos")) == [] diff --git a/tests/test_bb_ops_pipelines.py b/tests/test_bb_ops_pipelines.py new file mode 100644 index 0000000..b64c6cd --- /dev/null +++ b/tests/test_bb_ops_pipelines.py @@ -0,0 +1,644 @@ +""" +Tests for bb_ops pipeline operations. + +Discipline: every test asserts the request URL, method, and body shape +the function emits — not just the response value. A test that only checks +the return value would pass against a function that hits the wrong +endpoint or sends a malformed payload but happens to get a 200 back from +the mock. That's the "mock returns success regardless of request body" +anti-pattern called out in the testing methodology. + +All fixtures are fictional: workspace `acme`, repo `widget-service`, +users `alice` / `bob`. +""" + +from __future__ import annotations + +import io +import json +import urllib.error +import urllib.request +from typing import Any + +import pytest + +import bb_api +import bb_ops +from bb_api import BBApiError, BBClient, BBConfig, DEFAULT_API_BASE + + +# --------------------------------------------------------------------------- +# Test scaffolding (same shape as test_bb_api.py's _CaptureOpener) +# --------------------------------------------------------------------------- + + +class _CaptureOpener: + """Records each request and returns canned JSON. Reusing the same + pattern as test_bb_api to keep cognitive overhead low across the + suite.""" + + def __init__(self, responses: list[Any]): + self.responses = list(responses) + self.calls: list[dict[str, Any]] = [] + + def open(self, req: urllib.request.Request, timeout: float = 30.0) -> Any: + body = req.data + normalised_headers = {k.title(): v for k, v in req.header_items()} + self.calls.append( + { + "url": req.full_url, + "method": req.get_method(), + "headers": normalised_headers, + "body": json.loads(body.decode("utf-8")) if body else None, + "timeout": timeout, + } + ) + if not self.responses: + raise AssertionError( + f"opener received an unexpected request: " + f"{req.get_method()} {req.full_url}" + ) + resp = self.responses.pop(0) + # Three response shapes: + # dict | list -> JSON-encoded body + # None -> empty body (204 No Content) + # bytes -> raw body (for fetch_redirected_text tests) + # Exception -> raised on open (for redirect / error tests) + if isinstance(resp, BaseException): + raise resp + if resp is None: + body_bytes: bytes = b"" + elif isinstance(resp, (bytes, bytearray)): + body_bytes = bytes(resp) + else: + body_bytes = json.dumps(resp).encode("utf-8") + return _FakeResponse(body_bytes) + + +class _FakeResponse: + def __init__(self, body: bytes): + self._body = body + + def read(self) -> bytes: + return self._body + + def __enter__(self) -> "_FakeResponse": + return self + + def __exit__(self, *_: Any) -> None: + pass + + +def _client(opener: _CaptureOpener) -> BBClient: + cfg = BBConfig( + user="alice@example.com", + token="tok-xyz", + workspace="acme", + api_base=DEFAULT_API_BASE, + ) + return BBClient(cfg, opener=opener) + + +def _pipelines_url() -> str: + return DEFAULT_API_BASE + "/repositories/acme/widget-service/pipelines/" + + +def _make_pipeline(build_number: int, uuid: str = "p-uuid") -> dict[str, Any]: + """Realistic-shape pipeline record. Matches what Bitbucket returns + enough to exercise the parsing in bb_ops without being a fixture + burden.""" + return { + "build_number": build_number, + "uuid": f"{{{uuid}}}", + "state": {"name": "COMPLETED", "result": {"name": "SUCCESSFUL"}}, + "target": {"ref_name": "main"}, + "created_on": "2026-05-26T12:00:00Z", + "duration_in_seconds": 42, + } + + +def _make_step(name: str, uuid: str) -> dict[str, Any]: + return { + "uuid": f"{{{uuid}}}", + "name": name, + "state": {"name": "COMPLETED", "result": {"name": "SUCCESSFUL"}}, + "duration_in_seconds": 30, + } + + +# =========================================================================== +# Internal helpers +# =========================================================================== + + +class TestWrapUuid: + def test_strips_braces_and_url_encodes(self) -> None: + # Bitbucket UUIDs come in `{uuid}` shape on some endpoints and bare + # on others; _wrap_uuid normalises to the URL-encoded brace form. + assert bb_ops._wrap_uuid("abc-123") == "%7Babc-123%7D" + assert bb_ops._wrap_uuid("{abc-123}") == "%7Babc-123%7D" + + def test_handles_whitespace(self) -> None: + assert bb_ops._wrap_uuid(" abc-123 ") == "%7Babc-123%7D" + + +class TestStripUuidBraces: + def test_bare_uuid(self) -> None: + assert bb_ops._strip_uuid_braces("abc-123") == "abc-123" + + def test_braced_uuid(self) -> None: + assert bb_ops._strip_uuid_braces("{abc-123}") == "abc-123" + + def test_empty_raises(self) -> None: + # The bash equivalent silently produces "" and then misroutes the + # next URL — we make it loud instead. + with pytest.raises(BBApiError, match="missing uuid"): + bb_ops._strip_uuid_braces("") + with pytest.raises(BBApiError, match="missing uuid"): + bb_ops._strip_uuid_braces(None) + + +# =========================================================================== +# pipelines_list +# =========================================================================== + + +class TestPipelinesList: + def test_single_page_default_count(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [_make_pipeline(n) for n in range(10, 0, -1)], + # No `next` -> single page. + }, + ] + ) + result = bb_ops.pipelines_list(_client(opener), "acme", "widget-service") + assert len(result) == 10 + assert [p["build_number"] for p in result] == list(range(10, 0, -1)) + # Assert the EXACT request shape. + call = opener.calls[0] + assert call["method"] == "GET" + assert call["url"].startswith(_pipelines_url() + "?") + assert "sort=-created_on" in call["url"] + # Default count=10 → pagelen=10 (matches bash exactly). + assert "pagelen=10" in call["url"] + + def test_count_capped_at_bitbucket_max(self) -> None: + # Bitbucket's pagelen cap is 100. Caller wants 250 → pagelen=100 + # per page, two pages walked. + opener = _CaptureOpener( + [ + { + "values": [_make_pipeline(n) for n in range(250, 150, -1)], + "next": _pipelines_url() + "?page=2", + }, + { + "values": [_make_pipeline(n) for n in range(150, 0, -1)], + }, + ] + ) + result = bb_ops.pipelines_list( + _client(opener), "acme", "widget-service", count=250 + ) + assert len(result) == 250 + # First request used pagelen=100 (the cap), NOT 250. + assert "pagelen=100" in opener.calls[0]["url"] + + def test_stops_at_count_mid_page(self) -> None: + # Caller asks for 7 — we honour exactly 7 even though the first + # page returned 10. + opener = _CaptureOpener( + [{"values": [_make_pipeline(n) for n in range(10, 0, -1)]}] + ) + result = bb_ops.pipelines_list( + _client(opener), "acme", "widget-service", count=7 + ) + assert len(result) == 7 + + def test_branch_filter(self) -> None: + opener = _CaptureOpener([{"values": []}]) + bb_ops.pipelines_list( + _client(opener), "acme", "widget-service", branch="feat/widget" + ) + url = opener.calls[0]["url"] + # Bitbucket's contract for filtering by branch is `target.ref_name=...`. + assert "target.ref_name=feat%2Fwidget" in url + + def test_rejects_non_positive_count(self) -> None: + opener = _CaptureOpener([]) + for bad in (0, -1, "ten"): + with pytest.raises(ValueError, match="count"): + bb_ops.pipelines_list( + _client(opener), "acme", "widget-service", count=bad # type: ignore[arg-type] + ) + assert opener.calls == [] # no request emitted for any bad input + + +# =========================================================================== +# _resolve_pipeline_uuid (covered indirectly via pipeline_show, but worth +# direct tests for the not-found and pagination-walk paths) +# =========================================================================== + + +class TestResolvePipelineUuid: + def test_finds_on_first_page(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [ + _make_pipeline(42, uuid="target-uuid"), + _make_pipeline(41), + ] + } + ] + ) + uuid = bb_ops._resolve_pipeline_uuid( + _client(opener), "acme", "widget-service", 42 + ) + assert uuid == "target-uuid" + + def test_walks_pages_to_find(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [_make_pipeline(n) for n in range(100, 50, -1)], + "next": _pipelines_url() + "?page=2", + }, + { + "values": [_make_pipeline(n, uuid=f"u-{n}") for n in range(50, 0, -1)], + }, + ] + ) + uuid = bb_ops._resolve_pipeline_uuid( + _client(opener), "acme", "widget-service", 5 + ) + assert uuid == "u-5" + assert len(opener.calls) == 2 + + def test_not_found_raises_bbopnotfound(self) -> None: + opener = _CaptureOpener([{"values": [_make_pipeline(n) for n in range(10, 0, -1)]}]) + with pytest.raises(bb_ops.BBOpNotFound, match="#999"): + bb_ops._resolve_pipeline_uuid( + _client(opener), "acme", "widget-service", 999 + ) + + def test_rejects_invalid_build_number(self) -> None: + opener = _CaptureOpener([]) + for bad in (0, -1, "42", None): + with pytest.raises(ValueError, match="build_number"): + bb_ops._resolve_pipeline_uuid( + _client(opener), "acme", "widget-service", bad # type: ignore[arg-type] + ) + assert opener.calls == [] + + def test_scan_limit_caps_search(self) -> None: + # The walker should stop after scan_limit items even if more pages + # are available. Defend against an unbounded search. + opener = _CaptureOpener( + [ + { + "values": [_make_pipeline(n) for n in range(100, 0, -1)], + # Server says there's more — we should NOT request page 2 + # if scan_limit < 100 has already been hit. + "next": _pipelines_url() + "?page=2", + }, + ] + ) + with pytest.raises(bb_ops.BBOpNotFound): + bb_ops._resolve_pipeline_uuid( + _client(opener), "acme", "widget-service", 999, scan_limit=50 + ) + # Only the first page should have been fetched (scan_limit=50 hit + # before exhausting page 1). + assert len(opener.calls) == 1 + + +# =========================================================================== +# pipeline_show +# =========================================================================== + + +class TestPipelineShow: + def test_fetches_by_uuid_after_lookup(self) -> None: + # 1) list-walk to find build 42's uuid + # 2) GET the pipeline by uuid + target_uuid = "abc-123-def" + opener = _CaptureOpener( + [ + {"values": [_make_pipeline(42, uuid=target_uuid)]}, + _make_pipeline(42, uuid=target_uuid), # the show response + ] + ) + result = bb_ops.pipeline_show(_client(opener), "acme", "widget-service", 42) + assert result["build_number"] == 42 + + # Two requests: list, then individual show. + assert len(opener.calls) == 2 + # Show URL uses %7B...%7D bracketed UUID — the bash contract. + assert ( + opener.calls[1]["url"] + == _pipelines_url() + "%7Babc-123-def%7D" + ) + assert opener.calls[1]["method"] == "GET" + + +# =========================================================================== +# pipeline_steps +# =========================================================================== + + +class TestPipelineSteps: + def test_lists_steps_for_build(self) -> None: + opener = _CaptureOpener( + [ + {"values": [_make_pipeline(7, uuid="pipe-uuid")]}, # uuid lookup + {"values": [_make_step("build", "s1"), _make_step("deploy", "s2")]}, + ] + ) + result = bb_ops.pipeline_steps(_client(opener), "acme", "widget-service", 7) + assert [s["name"] for s in result] == ["build", "deploy"] + # Step list URL hits /pipelines/{uuid}/steps/ + assert "/pipelines/%7Bpipe-uuid%7D/steps/" in opener.calls[1]["url"] + + +# =========================================================================== +# pipeline_trigger +# =========================================================================== + + +class TestPipelineTrigger: + def test_default_pipeline_payload_shape(self) -> None: + opener = _CaptureOpener([_make_pipeline(99)]) + bb_ops.pipeline_trigger( + _client(opener), "acme", "widget-service", branch="feat/widget" + ) + call = opener.calls[0] + assert call["method"] == "POST" + assert call["url"] == _pipelines_url() + # Default pipeline: no `selector`, no `variables` key. + assert call["body"] == { + "target": {"ref_name": "feat/widget", "ref_type": "branch"} + } + + def test_custom_pipeline_payload_shape(self) -> None: + opener = _CaptureOpener([_make_pipeline(100)]) + bb_ops.pipeline_trigger( + _client(opener), + "acme", + "widget-service", + branch="main", + pattern="deploy-prod", + ) + assert opener.calls[0]["body"] == { + "target": { + "ref_name": "main", + "ref_type": "branch", + "selector": {"type": "custom", "pattern": "deploy-prod"}, + } + } + + def test_variables_dict_payload_shape(self) -> None: + opener = _CaptureOpener([_make_pipeline(101)]) + bb_ops.pipeline_trigger( + _client(opener), + "acme", + "widget-service", + branch="main", + variables={"REGION": "us-west-2", "DEPLOY_TAG": "v2.3"}, + ) + body = opener.calls[0]["body"] + # Bitbucket's contract: list of {"key", "value"} objects. + assert sorted(body["variables"], key=lambda v: v["key"]) == [ + {"key": "DEPLOY_TAG", "value": "v2.3"}, + {"key": "REGION", "value": "us-west-2"}, + ] + + def test_variables_iterable_of_pairs(self) -> None: + opener = _CaptureOpener([_make_pipeline(102)]) + bb_ops.pipeline_trigger( + _client(opener), + "acme", + "widget-service", + branch="main", + variables=[("A", "1"), ("B", "2")], + ) + assert opener.calls[0]["body"]["variables"] == [ + {"key": "A", "value": "1"}, + {"key": "B", "value": "2"}, + ] + + def test_empty_variables_omitted_from_payload(self) -> None: + opener = _CaptureOpener([_make_pipeline(103)]) + bb_ops.pipeline_trigger( + _client(opener), "acme", "widget-service", branch="main", variables={} + ) + # Empty variables → no `variables` key in the request body, NOT an + # empty list. Matches Bitbucket's "absence is default" contract. + assert "variables" not in opener.calls[0]["body"] + + def test_rejects_non_string_variable_value(self) -> None: + # Bitbucket only accepts string values; defend at the boundary. + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="variable value"): + bb_ops.pipeline_trigger( + _client(opener), + "acme", + "widget-service", + branch="main", + variables={"COUNT": 42}, # type: ignore[dict-item] + ) + assert opener.calls == [] + + def test_rejects_empty_pattern(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="pattern"): + bb_ops.pipeline_trigger( + _client(opener), "acme", "widget-service", branch="main", pattern="" + ) + assert opener.calls == [] + + def test_rejects_empty_branch(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="branch"): + bb_ops.pipeline_trigger( + _client(opener), "acme", "widget-service", branch="" + ) + assert opener.calls == [] + + +# =========================================================================== +# pipeline_stop +# =========================================================================== + + +class TestPipelineStop: + def test_posts_to_stop_endpoint(self) -> None: + # Two responses: the uuid-lookup list, then the stop POST (returns + # None to simulate Bitbucket's 204 No Content). + opener = _CaptureOpener( + [ + {"values": [_make_pipeline(7, uuid="stoppable")]}, + None, + ] + ) + result = bb_ops.pipeline_stop(_client(opener), "acme", "widget-service", 7) + assert result is None # 204 + # The stop call hits /pipelines/{uuid}/stopPipeline and is a POST + # with no body. + assert opener.calls[1]["method"] == "POST" + assert opener.calls[1]["url"].endswith( + "/pipelines/%7Bstoppable%7D/stopPipeline" + ) + assert opener.calls[1]["body"] is None + + +# =========================================================================== +# pipeline_logs (the redirect-with-auth-strip path) +# =========================================================================== + + +class TestPipelineLogs: + def test_inline_log_body_returned(self) -> None: + # Three requests: + # 1. pipelines list (find build_number's uuid) + # 2. steps list (find step_index's uuid) + # 3. log fetch — server returns the log body inline (200), no redirect + opener = _CaptureOpener( + [ + {"values": [_make_pipeline(42, uuid="pipe-uuid")]}, + {"values": [_make_step("build", "step-uuid")]}, + b"+ echo hello\nhello\n", # raw log body + ] + ) + result = bb_ops.pipeline_logs( + _client(opener), "acme", "widget-service", 42, 0 + ) + assert result == "+ echo hello\nhello\n" + log_call = opener.calls[2] + assert log_call["method"] == "GET" + assert log_call["url"].endswith( + "/pipelines/%7Bpipe-uuid%7D/steps/%7Bstep-uuid%7D/log" + ) + # The log call carries Authorization (no redirect happened). + assert "Authorization" in log_call["headers"] + + def test_follows_s3_redirect_and_strips_auth(self) -> None: + # Models the real Bitbucket behaviour: log endpoint returns 307 to + # a signed S3 URL. We follow, and the second call MUST NOT carry + # Authorization (S3 would reject it, and we don't want our + # Bitbucket Basic credential going anywhere else). + s3_url = ( + "https://bbci-pipeline-logs.s3.amazonaws.com/" + "acme/widget-service/42/build.log?X-Amz-Signature=abc" + ) + redirect_response = urllib.error.HTTPError( + url=DEFAULT_API_BASE + + "/repositories/acme/widget-service/pipelines/%7Bp%7D/steps/%7Bs%7D/log", + code=307, + msg="Temporary Redirect", + hdrs={"Location": s3_url}, # type: ignore[arg-type] + fp=io.BytesIO(b""), + ) + + opener = _CaptureOpener( + [ + {"values": [_make_pipeline(42, uuid="p")]}, # build_number lookup + {"values": [_make_step("build", "s")]}, # step index lookup + redirect_response, # log fetch → 307 + b"log content from s3\n", # follow-up to S3 + ] + ) + result = bb_ops.pipeline_logs( + _client(opener), "acme", "widget-service", 42, 0 + ) + assert result == "log content from s3\n" + # Four total requests; the 4th was the S3 follow-up. + assert len(opener.calls) == 4 + s3_call = opener.calls[3] + assert s3_call["url"] == s3_url + # CRITICAL: Authorization must NOT have been sent to S3. + assert "Authorization" not in s3_call["headers"], ( + "Bitbucket Basic auth was sent to the S3 host — this is the " + "credential-leak the cross-host strip is meant to prevent." + ) + + def test_too_many_redirects_raises(self) -> None: + def _redirect_to(target: str) -> urllib.error.HTTPError: + return urllib.error.HTTPError( + url="https://x", + code=307, + msg="Temporary Redirect", + hdrs={"Location": target}, # type: ignore[arg-type] + fp=io.BytesIO(b""), + ) + + # Three responses for the uuid lookups + a chain of redirects that + # exceeds the default max_redirects (5). + opener = _CaptureOpener( + [ + {"values": [_make_pipeline(1, uuid="p")]}, + {"values": [_make_step("build", "s")]}, + _redirect_to("https://api.bitbucket.org/2.0/hop/1"), + _redirect_to("https://api.bitbucket.org/2.0/hop/2"), + _redirect_to("https://api.bitbucket.org/2.0/hop/3"), + _redirect_to("https://api.bitbucket.org/2.0/hop/4"), + _redirect_to("https://api.bitbucket.org/2.0/hop/5"), + _redirect_to("https://api.bitbucket.org/2.0/hop/6"), # >5 hops + ] + ) + with pytest.raises(BBApiError, match="redirect chain exceeded"): + bb_ops.pipeline_logs(_client(opener), "acme", "widget-service", 1, 0) + + def test_redirect_without_location_raises(self) -> None: + broken_redirect = urllib.error.HTTPError( + url="https://x", + code=302, + msg="Found", + hdrs={}, # type: ignore[arg-type] + fp=io.BytesIO(b""), + ) + opener = _CaptureOpener( + [ + {"values": [_make_pipeline(1, uuid="p")]}, + {"values": [_make_step("build", "s")]}, + broken_redirect, + ] + ) + with pytest.raises(BBApiError, match="missing Location"): + bb_ops.pipeline_logs(_client(opener), "acme", "widget-service", 1, 0) + + +# =========================================================================== +# _resolve_step_uuid +# =========================================================================== + + +class TestResolveStepUuid: + def test_valid_index(self) -> None: + opener = _CaptureOpener( + [{"values": [_make_step("build", "s1"), _make_step("test", "s2")]}] + ) + uuid = bb_ops._resolve_step_uuid( + _client(opener), "acme", "widget-service", "pipe-uuid", 1 + ) + # Returns just the uuid — callers that want the name fetch + # steps separately via pipeline_steps(). + assert uuid == "s2" + + def test_index_out_of_range_raises(self) -> None: + opener = _CaptureOpener( + [{"values": [_make_step("build", "s1")]}] + ) + with pytest.raises(bb_ops.BBOpNotFound, match="out of range"): + bb_ops._resolve_step_uuid( + _client(opener), "acme", "widget-service", "pipe-uuid", 5 + ) + + def test_rejects_negative_index(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="step_index"): + bb_ops._resolve_step_uuid( + _client(opener), "acme", "widget-service", "pipe-uuid", -1 + ) + assert opener.calls == [] diff --git a/tests/test_bb_ops_prs.py b/tests/test_bb_ops_prs.py new file mode 100644 index 0000000..1908c5c --- /dev/null +++ b/tests/test_bb_ops_prs.py @@ -0,0 +1,797 @@ +""" +Tests for bb_ops pull-request operations. + +Discipline: every HTTP-touching test asserts URL, method, AND body shape. +All fixtures are fictional (acme / widget-service / alice / bob). +""" + +from __future__ import annotations + +import json +import urllib.error +import urllib.request +from typing import Any + +import pytest + +import bb_ops +from bb_api import BBClient, BBConfig, DEFAULT_API_BASE + + +# --------------------------------------------------------------------------- +# Test scaffolding +# --------------------------------------------------------------------------- + + +class _CaptureOpener: + """Records each request and returns canned JSON. Same shape as the + helper in test_bb_ops_pipelines, intentionally duplicated rather than + extracted to a shared fixture module: each test file should be + readable end-to-end without jumping to another file.""" + + def __init__(self, responses: list[Any]): + self.responses = list(responses) + self.calls: list[dict[str, Any]] = [] + + def open(self, req: urllib.request.Request, timeout: float = 30.0) -> Any: + body = req.data + normalised_headers = {k.title(): v for k, v in req.header_items()} + self.calls.append( + { + "url": req.full_url, + "method": req.get_method(), + "headers": normalised_headers, + "body": json.loads(body.decode("utf-8")) if body else None, + "timeout": timeout, + } + ) + if not self.responses: + raise AssertionError( + f"opener received an unexpected request: " + f"{req.get_method()} {req.full_url}" + ) + resp = self.responses.pop(0) + if isinstance(resp, BaseException): + raise resp + if resp is None: + body_bytes: bytes = b"" + elif isinstance(resp, (bytes, bytearray)): + body_bytes = bytes(resp) + else: + body_bytes = json.dumps(resp).encode("utf-8") + return _FakeResponse(body_bytes) + + +class _FakeResponse: + def __init__(self, body: bytes): + self._body = body + + def read(self) -> bytes: + return self._body + + def __enter__(self) -> "_FakeResponse": + return self + + def __exit__(self, *_: Any) -> None: + pass + + +def _client(opener: _CaptureOpener) -> BBClient: + cfg = BBConfig( + user="alice@example.com", + token="tok-xyz", + workspace="acme", + api_base=DEFAULT_API_BASE, + ) + return BBClient(cfg, opener=opener) + + +def _prs_url() -> str: + return DEFAULT_API_BASE + "/repositories/acme/widget-service/pullrequests" + + +def _make_pr(id_: int, state: str = "OPEN") -> dict[str, Any]: + return { + "id": id_, + "title": f"PR {id_}", + "state": state, + "author": {"display_name": "Alice"}, + "source": {"branch": {"name": "feat/widget"}}, + "destination": {"branch": {"name": "main"}}, + "created_on": "2026-05-26T12:00:00Z", + "updated_on": "2026-05-26T13:00:00Z", + "links": {"html": {"href": f"https://bitbucket.org/acme/widget-service/pull-requests/{id_}"}}, + } + + +# =========================================================================== +# prs_list +# =========================================================================== + + +class TestPrsList: + def test_default_state_open_count_25(self) -> None: + opener = _CaptureOpener([{"values": [_make_pr(i) for i in range(1, 26)]}]) + result = bb_ops.prs_list(_client(opener), "acme", "widget-service") + assert len(result) == 25 + url = opener.calls[0]["url"] + assert url.startswith(_prs_url() + "?") + assert "state=OPEN" in url + assert "pagelen=25" in url + + def test_state_filter(self) -> None: + opener = _CaptureOpener([{"values": []}]) + bb_ops.prs_list(_client(opener), "acme", "widget-service", state="MERGED") + assert "state=MERGED" in opener.calls[0]["url"] + + def test_count_walks_pages(self) -> None: + # Caller wants 150 PRs; Bitbucket caps pagelen at 100. Walk two pages. + opener = _CaptureOpener( + [ + { + "values": [_make_pr(i) for i in range(1, 101)], + "next": _prs_url() + "?page=2", + }, + {"values": [_make_pr(i) for i in range(101, 201)]}, + ] + ) + result = bb_ops.prs_list( + _client(opener), "acme", "widget-service", count=150 + ) + assert len(result) == 150 + assert "pagelen=100" in opener.calls[0]["url"] + + def test_rejects_non_positive_count(self) -> None: + opener = _CaptureOpener([]) + # True/False included: bool is a subclass of int but its URL + # stringification is "True"/"False" — symmetric with the bool + # rejection in TestPrActivity / TestPrCommentsList / pr_id checks. + for bad in (0, -1, True, False, "ten"): + with pytest.raises(ValueError, match="count"): + bb_ops.prs_list( + _client(opener), + "acme", + "widget-service", + count=bad, # type: ignore[arg-type] + ) + assert opener.calls == [] # no request emitted for bad input + + def test_rejects_empty_state(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="state"): + bb_ops.prs_list(_client(opener), "acme", "widget-service", state="") + assert opener.calls == [] + + @pytest.mark.parametrize("bad_state", ["OPENED", "open", "OPEN,MERGED", "INVALID"]) + def test_rejects_unknown_state(self, bad_state: str) -> None: + """A typo like OPENED, a case bug like 'open', or the + comma-separated compound form (which Bitbucket does NOT accept + on the simple ?state= filter) all need to fail at the boundary + — otherwise the API call burns a quota slot returning empty + results or a 400. _KNOWN_PR_STATES is the symmetric guard to + pr_merge's strategy validation.""" + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="state must be one of"): + bb_ops.prs_list( + _client(opener), "acme", "widget-service", state=bad_state + ) + assert opener.calls == [] + + +# =========================================================================== +# pr_show + pr_activity +# =========================================================================== + + +class TestPrShow: + def test_get_url_and_id_validation(self) -> None: + opener = _CaptureOpener([_make_pr(42)]) + result = bb_ops.pr_show(_client(opener), "acme", "widget-service", 42) + assert result["id"] == 42 + assert opener.calls[0]["url"] == _prs_url() + "/42" + assert opener.calls[0]["method"] == "GET" + + def test_rejects_invalid_pr_id(self) -> None: + opener = _CaptureOpener([]) + # True/False included explicitly: bool is a subclass of int in + # Python, so a naive isinstance(x, int) check would accept them + # and stringify them in URLs as "True"/"False" — the regression + # this validator now defends against. + for bad in (0, -5, "42", 1.5, None, True, False): + with pytest.raises(ValueError, match="pr_id"): + bb_ops.pr_show( + _client(opener), "acme", "widget-service", bad # type: ignore[arg-type] + ) + assert opener.calls == [] + + +class TestPrActivity: + def test_walks_activity_endpoint(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [{"approval": {"user": {"display_name": "Bob"}}}], + } + ] + ) + result = bb_ops.pr_activity(_client(opener), "acme", "widget-service", 42) + assert len(result) == 1 + url = opener.calls[0]["url"] + assert url.startswith(_prs_url() + "/42/activity?") + assert "pagelen=50" in url + + def test_count_walks_pages(self) -> None: + # Same paginate-with-count semantics as prs_list; verify the + # behaviour symmetrically. + opener = _CaptureOpener( + [ + { + "values": [{"i": i} for i in range(100)], + "next": _prs_url() + "/42/activity?page=2", + }, + {"values": [{"i": i} for i in range(100, 150)]}, + ] + ) + result = bb_ops.pr_activity( + _client(opener), "acme", "widget-service", 42, count=150 + ) + assert len(result) == 150 + assert "pagelen=100" in opener.calls[0]["url"] + + def test_rejects_non_positive_count(self) -> None: + opener = _CaptureOpener([]) + for bad in (0, -1, True, False, "ten"): + with pytest.raises(ValueError, match="count"): + bb_ops.pr_activity( + _client(opener), + "acme", + "widget-service", + 42, + count=bad, # type: ignore[arg-type] + ) + assert opener.calls == [] + + def test_rejects_invalid_pr_id(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="pr_id"): + bb_ops.pr_activity(_client(opener), "acme", "widget-service", 0) + assert opener.calls == [] + + +# =========================================================================== +# pr_create +# =========================================================================== + + +class TestPrCreate: + def test_minimal_payload(self) -> None: + opener = _CaptureOpener([_make_pr(7)]) + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="Add widget", + source_branch="feat/widget", + ) + call = opener.calls[0] + assert call["method"] == "POST" + assert call["url"] == _prs_url() + assert call["body"] == { + "title": "Add widget", + "source": {"branch": {"name": "feat/widget"}}, + "destination": {"branch": {"name": "main"}}, + "close_source_branch": True, + } + + def test_with_description_and_destination(self) -> None: + opener = _CaptureOpener([_make_pr(8)]) + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="Add widget", + source_branch="feat/widget", + destination_branch="develop", + description="Adds the widget service.", + ) + body = opener.calls[0]["body"] + assert body["destination"] == {"branch": {"name": "develop"}} + assert body["description"] == "Adds the widget service." + + def test_close_source_branch_override(self) -> None: + opener = _CaptureOpener([_make_pr(9)]) + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="Add widget", + source_branch="feat/widget", + close_source_branch=False, + ) + assert opener.calls[0]["body"]["close_source_branch"] is False + + def test_reviewers_payload_shape(self) -> None: + opener = _CaptureOpener([_make_pr(10)]) + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="Add widget", + source_branch="feat/widget", + reviewers=["{alice-uuid}", "{bob-uuid}"], + ) + body = opener.calls[0]["body"] + # Bitbucket's contract: list of {"uuid": "..."} objects. + assert body["reviewers"] == [ + {"uuid": "{alice-uuid}"}, + {"uuid": "{bob-uuid}"}, + ] + + def test_empty_reviewers_omitted(self) -> None: + opener = _CaptureOpener([_make_pr(11)]) + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="Add widget", + source_branch="feat/widget", + reviewers=[], + ) + # Empty list → no `reviewers` key (matches the empty-variables + # contract in pipeline_trigger). + assert "reviewers" not in opener.calls[0]["body"] + + def test_empty_description_omitted(self) -> None: + opener = _CaptureOpener([_make_pr(12)]) + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="t", + source_branch="s", + ) + # Empty description → no `description` key. Bash includes it + # always (even empty); Python omits — flagged as 4.7 alignment. + assert "description" not in opener.calls[0]["body"] + + @pytest.mark.parametrize( + "field,value", + [ + # Empty string AND whitespace-only must both reject for every + # required string field. Without .strip(), whitespace-only + # values create degenerate PRs with visually-blank fields + # in any list view. + ("title", ""), + ("title", " "), + ("title", "\n\t"), + ("source_branch", ""), + ("source_branch", " "), + ("destination_branch", ""), + ("destination_branch", "\t"), + ], + ) + def test_rejects_empty_required_fields(self, field: str, value: str) -> None: + opener = _CaptureOpener([]) + kwargs = { + "title": "t", + "source_branch": "s", + "destination_branch": "main", + } + kwargs[field] = value + with pytest.raises(ValueError, match=field): + bb_ops.pr_create(_client(opener), "acme", "widget-service", **kwargs) + assert opener.calls == [] + + @pytest.mark.parametrize( + "bad_reviewer", + [ + "", # empty string + None, # non-string sentinel + 123, # non-string scalar + {"uuid": "alice-uuid"}, # the most plausible caller mistake — pre-shaping the payload + ], + ) + def test_rejects_invalid_reviewer(self, bad_reviewer: Any) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="reviewer"): + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="t", + source_branch="s", + reviewers=[bad_reviewer], + ) + assert opener.calls == [] + + def test_rejects_bare_string_reviewers(self) -> None: + """A bare string is technically an Iterable[str] (yields chars). + Without the early-reject, `reviewers="alice-uuid"` would silently + produce `[{"uuid":"a"}, {"uuid":"l"}, {"uuid":"i"}, ...]`.""" + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="reviewers must be a list"): + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="t", + source_branch="s", + reviewers="alice-uuid", # type: ignore[arg-type] + ) + assert opener.calls == [] + + def test_rejects_non_string_description(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="description"): + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="t", + source_branch="s", + description={"foo": "bar"}, # type: ignore[arg-type] + ) + assert opener.calls == [] + + def test_rejects_non_bool_close_source_branch(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="close_source_branch"): + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="t", + source_branch="s", + close_source_branch="yes", # type: ignore[arg-type] + ) + assert opener.calls == [] + + def test_whitespace_only_description_omitted(self) -> None: + opener = _CaptureOpener([_make_pr(13)]) + bb_ops.pr_create( + _client(opener), + "acme", + "widget-service", + title="t", + source_branch="s", + description=" \n\t ", + ) + # Whitespace-only descriptions don't carry information; omit + # rather than ship a meaningless empty-ish field. + assert "description" not in opener.calls[0]["body"] + + +# =========================================================================== +# pr_approve / pr_unapprove +# =========================================================================== + + +class TestPrApprove: + def test_post_to_approve_endpoint(self) -> None: + opener = _CaptureOpener([{"approved": True, "user": {"display_name": "Alice"}}]) + result = bb_ops.pr_approve(_client(opener), "acme", "widget-service", 7) + assert result["approved"] is True + # The MCP layer now sees the response — bash discards it with > /dev/null. + call = opener.calls[0] + assert call["method"] == "POST" + assert call["url"] == _prs_url() + "/7/approve" + assert call["body"] is None + + +class TestPrUnapprove: + def test_delete_to_approve_endpoint(self) -> None: + # Bitbucket contract: DELETE the same /approve subpath that POST + # uses for approval. Not exposed by bash today (4.7 parity gap). + opener = _CaptureOpener([None]) # 204 No Content + result = bb_ops.pr_unapprove(_client(opener), "acme", "widget-service", 7) + assert result is None + call = opener.calls[0] + assert call["method"] == "DELETE" + assert call["url"] == _prs_url() + "/7/approve" + + +# =========================================================================== +# pr_merge +# =========================================================================== + + +class TestPrMerge: + def test_default_strategy_payload(self) -> None: + opener = _CaptureOpener([_make_pr(7, state="MERGED")]) + bb_ops.pr_merge(_client(opener), "acme", "widget-service", 7) + call = opener.calls[0] + # Mirror bash's PUT for the merge endpoint. Bitbucket has + # historically accepted both verbs and the bash side is the + # verified-working contract; aligning on one verb (likely POST, + # per current REST docs) is a 4.7 investigation. + assert call["method"] == "PUT" + assert call["url"] == _prs_url() + "/7/merge" + assert call["body"] == { + "type": "pullrequest", + "merge_strategy": "merge_commit", + "close_source_branch": True, + } + + def test_each_strategy(self) -> None: + for strategy in ("merge_commit", "squash", "fast_forward"): + opener = _CaptureOpener([_make_pr(7, state="MERGED")]) + bb_ops.pr_merge( + _client(opener), "acme", "widget-service", 7, strategy=strategy + ) + assert opener.calls[0]["body"]["merge_strategy"] == strategy + + @pytest.mark.parametrize( + "bad_strategy", + [ + "rebase", # not in Bitbucket's set + "", # empty string + None, # non-string sentinel + 123, # non-string scalar + ["squash"], # unhashable type — would have raised TypeError + {"squash"}, # unhashable type + ], + ) + def test_rejects_invalid_strategy(self, bad_strategy: Any) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="strategy"): + bb_ops.pr_merge( + _client(opener), + "acme", + "widget-service", + 7, + strategy=bad_strategy, + ) + assert opener.calls == [] + + def test_optional_message(self) -> None: + opener = _CaptureOpener([_make_pr(7, state="MERGED")]) + bb_ops.pr_merge( + _client(opener), + "acme", + "widget-service", + 7, + message="Custom merge message", + ) + assert opener.calls[0]["body"]["message"] == "Custom merge message" + + def test_close_source_branch_override(self) -> None: + opener = _CaptureOpener([_make_pr(7, state="MERGED")]) + bb_ops.pr_merge( + _client(opener), + "acme", + "widget-service", + 7, + close_source_branch=False, + ) + assert opener.calls[0]["body"]["close_source_branch"] is False + + def test_rejects_non_string_message(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="message"): + bb_ops.pr_merge( + _client(opener), + "acme", + "widget-service", + 7, + message=123, # type: ignore[arg-type] + ) + assert opener.calls == [] + + @pytest.mark.parametrize("bad_message", ["", " ", "\n\t"]) + def test_rejects_empty_or_whitespace_message(self, bad_message: str) -> None: + # Symmetric with pr_comment_add: an empty (or whitespace-only) + # message would produce a blank merge-commit subject line, + # visually empty in any `git log --oneline` view. + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="message"): + bb_ops.pr_merge( + _client(opener), "acme", "widget-service", 7, message=bad_message + ) + assert opener.calls == [] + + def test_rejects_non_bool_close_source_branch(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="close_source_branch"): + bb_ops.pr_merge( + _client(opener), + "acme", + "widget-service", + 7, + close_source_branch="yes", # type: ignore[arg-type] + ) + assert opener.calls == [] + + +# =========================================================================== +# pr_decline +# =========================================================================== + + +class TestPrDecline: + def test_post_to_decline_endpoint(self) -> None: + opener = _CaptureOpener([_make_pr(7, state="DECLINED")]) + bb_ops.pr_decline(_client(opener), "acme", "widget-service", 7) + call = opener.calls[0] + assert call["method"] == "POST" + assert call["url"] == _prs_url() + "/7/decline" + assert call["body"] is None + + +# =========================================================================== +# Boundary-validation symmetry — every PR-id-taking op rejects bad IDs +# =========================================================================== + + +@pytest.mark.parametrize( + "fn,extra_args", + [ + (bb_ops.pr_approve, ()), + (bb_ops.pr_unapprove, ()), + (bb_ops.pr_decline, ()), + (bb_ops.pr_comments_list, ()), + (bb_ops.pr_comment_add, ("body",)), + # pr_merge has required strategy default; works with no extra args. + (bb_ops.pr_merge, ()), + (bb_ops.pr_diff, ()), + (bb_ops.pr_show, ()), + (bb_ops.pr_activity, ()), + ], +) +def test_every_pr_op_rejects_bad_pr_id(fn: Any, extra_args: tuple[Any, ...]) -> None: + """If any future refactor removes `_validate_pr_id(pr_id)` from a + function, this catches it. Without this matrix the validator was + only directly tested on pr_show / pr_diff.""" + opener = _CaptureOpener([]) + for bad in (0, -5, True, False, "42", 1.5, None): + with pytest.raises(ValueError, match="pr_id"): + fn( + _client(opener), + "acme", + "widget-service", + bad, + *extra_args, + ) + assert opener.calls == [] + + +# =========================================================================== +# pr_diff +# =========================================================================== + + +class TestPrDiff: + def test_returns_raw_diff_text(self) -> None: + diff_body = ( + "diff --git a/widget.py b/widget.py\n" + "+++ b/widget.py\n" + "@@ -1 +1,2 @@\n" + "+# new line\n" + ) + opener = _CaptureOpener([diff_body.encode("utf-8")]) + result = bb_ops.pr_diff(_client(opener), "acme", "widget-service", 42) + assert result == diff_body + assert opener.calls[0]["url"] == _prs_url() + "/42/diff" + assert opener.calls[0]["method"] == "GET" + # The first hop must carry Authorization (we own the request to + # api.bitbucket.org). A regression that wired pr_diff to a + # no-auth path would silently break authenticated diff fetches. + assert opener.calls[0]["headers"]["Authorization"].startswith("Basic ") + + def test_invalid_pr_id(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="pr_id"): + bb_ops.pr_diff(_client(opener), "acme", "widget-service", 0) + assert opener.calls == [] + + def test_follows_cross_host_redirect_and_strips_auth(self) -> None: + """If Bitbucket ever introduces a redirect on the diff endpoint + (the current behaviour is direct 200), the cross-host-auth-strip + protection from fetch_redirected_text must apply. A regression + that wired pr_diff to plain client.get would 5xx on any redirect + (default opener refuses 3xx) — different from the bash-side + failure mode but equally surprising. Pin the safe behaviour here + rather than only in test_bb_api. + + Models the same shape as the pipeline_logs S3-redirect test.""" + remote_url = "https://diff-cache.example.com/acme/widget-service/42.diff?sig=abc" + redirect = urllib.error.HTTPError( + url=DEFAULT_API_BASE + "/repositories/acme/widget-service/pullrequests/42/diff", + code=307, + msg="Temporary Redirect", + hdrs={"Location": remote_url}, # type: ignore[arg-type] + fp=__import__("io").BytesIO(b""), + ) + opener = _CaptureOpener( + [ + redirect, + b"diff body from remote\n", + ] + ) + result = bb_ops.pr_diff(_client(opener), "acme", "widget-service", 42) + assert result == "diff body from remote\n" + assert len(opener.calls) == 2 + # First hop MUST carry Authorization (we own the request to + # api.bitbucket.org). A regression that dropped auth on the + # initial request would otherwise pass silently. + assert opener.calls[0]["headers"]["Authorization"].startswith("Basic ") + # And the Location header was actually followed (not refetched + # original URL). + assert opener.calls[1]["url"] == remote_url + # Second hop must NOT carry the Bitbucket credential. + assert "Authorization" not in opener.calls[1]["headers"], ( + "Bitbucket Basic auth leaked to the diff-cache host" + ) + + +# =========================================================================== +# pr_comments_list + pr_comment_add +# =========================================================================== + + +class TestPrCommentsList: + def test_walks_comments_endpoint(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [ + {"id": 1, "content": {"raw": "LGTM"}, "user": {"display_name": "Alice"}}, + {"id": 2, "content": {"raw": "nit on line 3"}, "user": {"display_name": "Bob"}}, + ] + } + ] + ) + result = bb_ops.pr_comments_list( + _client(opener), "acme", "widget-service", 42 + ) + assert [c["id"] for c in result] == [1, 2] + url = opener.calls[0]["url"] + assert url.startswith(_prs_url() + "/42/comments?") + assert "pagelen=100" in url + + def test_rejects_non_positive_count(self) -> None: + opener = _CaptureOpener([]) + for bad in (0, -1, True, False, "ten"): + with pytest.raises(ValueError, match="count"): + bb_ops.pr_comments_list( + _client(opener), + "acme", + "widget-service", + 42, + count=bad, # type: ignore[arg-type] + ) + assert opener.calls == [] + + +class TestPrCommentAdd: + def test_posts_comment_body_in_content_raw(self) -> None: + opener = _CaptureOpener([{"id": 99, "content": {"raw": "Looks good."}}]) + result = bb_ops.pr_comment_add( + _client(opener), "acme", "widget-service", 42, "Looks good." + ) + assert result["id"] == 99 + call = opener.calls[0] + assert call["method"] == "POST" + assert call["url"] == _prs_url() + "/42/comments" + # Bitbucket's contract: {"content": {"raw": "<text>"}}. + assert call["body"] == {"content": {"raw": "Looks good."}} + + @pytest.mark.parametrize("bad_body", ["", " ", "\n\t"]) + def test_rejects_empty_or_whitespace_body(self, bad_body: str) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="body"): + bb_ops.pr_comment_add( + _client(opener), "acme", "widget-service", 42, bad_body + ) + assert opener.calls == [] + + def test_rejects_non_string_body(self) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="body"): + bb_ops.pr_comment_add( + _client(opener), + "acme", + "widget-service", + 42, + None, # type: ignore[arg-type] + ) + assert opener.calls == [] diff --git a/tests/test_bb_ops_repos.py b/tests/test_bb_ops_repos.py new file mode 100644 index 0000000..80d50ce --- /dev/null +++ b/tests/test_bb_ops_repos.py @@ -0,0 +1,444 @@ +""" +Tests for bb_ops repos / branches / vars / downloads / commits operations. + +Same discipline as the pipelines and PRs test files: assert URL + +method + body shape per HTTP touchpoint; never just response value. +Boundary-validation rejections assert `opener.calls == []` to prove no +network IO happened on bad input. + +All fixture data is fictional (acme / widget-service / alice / bob). +""" + +from __future__ import annotations + +import json +import urllib.request +from typing import Any + +import pytest + +import bb_ops +from bb_api import BBClient, BBConfig, DEFAULT_API_BASE + + +# --------------------------------------------------------------------------- +# Test scaffolding (duplicated across test files for end-to-end readability) +# --------------------------------------------------------------------------- + + +class _CaptureOpener: + def __init__(self, responses: list[Any]): + self.responses = list(responses) + self.calls: list[dict[str, Any]] = [] + + def open(self, req: urllib.request.Request, timeout: float = 30.0) -> Any: + body = req.data + normalised_headers = {k.title(): v for k, v in req.header_items()} + self.calls.append( + { + "url": req.full_url, + "method": req.get_method(), + "headers": normalised_headers, + "body": json.loads(body.decode("utf-8")) if body else None, + "timeout": timeout, + } + ) + if not self.responses: + raise AssertionError( + f"opener received an unexpected request: " + f"{req.get_method()} {req.full_url}" + ) + resp = self.responses.pop(0) + if isinstance(resp, BaseException): + raise resp + if resp is None: + body_bytes: bytes = b"" + elif isinstance(resp, (bytes, bytearray)): + body_bytes = bytes(resp) + else: + body_bytes = json.dumps(resp).encode("utf-8") + return _FakeResponse(body_bytes) + + +class _FakeResponse: + def __init__(self, body: bytes): + self._body = body + + def read(self) -> bytes: + return self._body + + def __enter__(self) -> "_FakeResponse": + return self + + def __exit__(self, *_: Any) -> None: + pass + + +def _client(opener: _CaptureOpener) -> BBClient: + cfg = BBConfig( + user="alice@example.com", + token="tok-xyz", + workspace="acme", + api_base=DEFAULT_API_BASE, + ) + return BBClient(cfg, opener=opener) + + +def _repo_url() -> str: + return DEFAULT_API_BASE + "/repositories/acme/widget-service" + + +# =========================================================================== +# repos_list +# =========================================================================== + + +class TestReposList: + def test_default_workspace_from_client(self) -> None: + # workspace=None should default to client.config.workspace ("acme"). + opener = _CaptureOpener([{"values": [{"slug": "widget-service"}]}]) + result = bb_ops.repos_list(_client(opener)) + assert len(result) == 1 + url = opener.calls[0]["url"] + assert url.startswith(DEFAULT_API_BASE + "/repositories/acme?") + assert "sort=-updated_on" in url + assert "pagelen=100" in url + + def test_explicit_workspace_override(self) -> None: + opener = _CaptureOpener([{"values": []}]) + bb_ops.repos_list(_client(opener), workspace="other-org") + assert opener.calls[0]["url"].startswith( + DEFAULT_API_BASE + "/repositories/other-org?" + ) + + def test_count_walks_pages(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [{"slug": f"repo-{i}"} for i in range(100)], + "next": DEFAULT_API_BASE + "/repositories/acme?page=2", + }, + {"values": [{"slug": f"repo-{i}"} for i in range(100, 250)]}, + ] + ) + result = bb_ops.repos_list(_client(opener), count=250) + assert len(result) == 250 + assert "pagelen=100" in opener.calls[0]["url"] + + def test_query_filter(self) -> None: + opener = _CaptureOpener([{"values": []}]) + bb_ops.repos_list(_client(opener), query='name ~ "widget"') + # urlencode emits `q=name+~+%22widget%22`: spaces -> `+`, `~` + # stays as-is (unreserved), `"` -> `%22`. Assert the exact form + # so a regression in either the validator or the urlencode + # behaviour is visible. + url = opener.calls[0]["url"] + assert "q=name+~+%22widget%22" in url + + @pytest.mark.parametrize("bad", [0, -1, True, False, "ten"]) + def test_rejects_non_positive_count(self, bad: Any) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="count"): + bb_ops.repos_list(_client(opener), count=bad) + assert opener.calls == [] + + @pytest.mark.parametrize("bad_workspace", ["", " ", "\n\t"]) + def test_rejects_empty_workspace(self, bad_workspace: str) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="workspace"): + bb_ops.repos_list(_client(opener), workspace=bad_workspace) + assert opener.calls == [] + + @pytest.mark.parametrize("bad_workspace", ["acme/widget", "a/b/c"]) + def test_rejects_workspace_with_slash(self, bad_workspace: str) -> None: + """Without this, `workspace="acme/widget"` would silently build + a single-repo endpoint URL and paginate against a response that + lacks `values` — confusing failure. Symmetric with bb_api.repo_path.""" + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="workspace.*'/'"): + bb_ops.repos_list(_client(opener), workspace=bad_workspace) + assert opener.calls == [] + + @pytest.mark.parametrize("bad_workspace", [".", ".."]) + def test_rejects_workspace_dot_segments(self, bad_workspace: str) -> None: + """Path-traversal defense — `/repositories/../widget` after URL + normalisation could resolve to the wrong workspace.""" + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match=r"'\.'|'\.\.'"): + bb_ops.repos_list(_client(opener), workspace=bad_workspace) + assert opener.calls == [] + + @pytest.mark.parametrize("bad_query", ["", " "]) + def test_rejects_empty_query(self, bad_query: str) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="query"): + bb_ops.repos_list(_client(opener), query=bad_query) + assert opener.calls == [] + + +# =========================================================================== +# repo_show +# =========================================================================== + + +class TestRepoShow: + def test_fetches_repo_metadata(self) -> None: + opener = _CaptureOpener( + [{"full_name": "acme/widget-service", "language": "python"}] + ) + result = bb_ops.repo_show(_client(opener), "acme", "widget-service") + assert result["language"] == "python" + call = opener.calls[0] + assert call["url"] == _repo_url() + assert call["method"] == "GET" + + +# =========================================================================== +# branches_list + branch_show +# =========================================================================== + + +class TestBranchesList: + def test_default_sort_and_pagelen(self) -> None: + opener = _CaptureOpener([{"values": [{"name": "main"}, {"name": "develop"}]}]) + result = bb_ops.branches_list(_client(opener), "acme", "widget-service") + assert [b["name"] for b in result] == ["main", "develop"] + url = opener.calls[0]["url"] + assert url.startswith(_repo_url() + "/refs/branches?") + assert "sort=-target.date" in url + assert "pagelen=50" in url + + def test_count_walks_pages(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [{"name": f"branch-{i}"} for i in range(100)], + "next": DEFAULT_API_BASE + "/x?page=2", + }, + {"values": [{"name": f"branch-{i}"} for i in range(100, 200)]}, + ] + ) + result = bb_ops.branches_list( + _client(opener), "acme", "widget-service", count=200 + ) + assert len(result) == 200 + + def test_query_filter(self) -> None: + opener = _CaptureOpener([{"values": []}]) + bb_ops.branches_list( + _client(opener), "acme", "widget-service", query='name ~ "feat"' + ) + # Exact-form assertion (symmetric with TestReposList.test_query_filter): + # a regression that silently mangled or dropped the BBQL string + # would otherwise pass a `"q=" in url` weak check. + assert "q=name+~+%22feat%22" in opener.calls[0]["url"] + + @pytest.mark.parametrize("bad", [0, -1, True, False]) + def test_rejects_non_positive_count(self, bad: Any) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="count"): + bb_ops.branches_list( + _client(opener), "acme", "widget-service", count=bad + ) + assert opener.calls == [] + + @pytest.mark.parametrize("bad_query", ["", " "]) + def test_rejects_empty_query(self, bad_query: str) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="query"): + bb_ops.branches_list( + _client(opener), "acme", "widget-service", query=bad_query + ) + assert opener.calls == [] + + +class TestBranchShow: + def test_fetches_single_branch(self) -> None: + opener = _CaptureOpener([{"name": "main", "target": {"hash": "abc123"}}]) + result = bb_ops.branch_show( + _client(opener), "acme", "widget-service", "main" + ) + assert result["name"] == "main" + assert opener.calls[0]["url"] == _repo_url() + "/refs/branches/main" + + def test_url_encodes_slash_in_branch_name(self) -> None: + # feat/widget would otherwise be interpreted as a sub-resource + # path; must URL-encode the slash. + opener = _CaptureOpener([{"name": "feat/widget"}]) + bb_ops.branch_show( + _client(opener), "acme", "widget-service", "feat/widget" + ) + assert opener.calls[0]["url"] == _repo_url() + "/refs/branches/feat%2Fwidget" + + @pytest.mark.parametrize("bad_name", ["", " ", "\n"]) + def test_rejects_empty_name(self, bad_name: str) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="name"): + bb_ops.branch_show( + _client(opener), "acme", "widget-service", bad_name + ) + assert opener.calls == [] + + def test_strips_whitespace_around_branch_name(self) -> None: + # Caller might pass branch name with stray whitespace from copy-paste. + # We strip but do not silently accept whitespace-only (rejected above). + opener = _CaptureOpener([{"name": "main"}]) + bb_ops.branch_show( + _client(opener), "acme", "widget-service", " main " + ) + assert opener.calls[0]["url"] == _repo_url() + "/refs/branches/main" + + +# =========================================================================== +# vars_list +# =========================================================================== + + +class TestVarsList: + def test_lists_pipeline_variables(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [ + {"key": "DEPLOY_TAG", "secured": False, "value": "latest"}, + {"key": "BB_TOKEN", "secured": True, "value": None}, + ] + } + ] + ) + result = bb_ops.vars_list(_client(opener), "acme", "widget-service") + assert [v["key"] for v in result] == ["DEPLOY_TAG", "BB_TOKEN"] + # Secured value comes back as None from the API; we don't mask + # at this layer (the MCP tool surfaces the `secured` flag). + assert result[1]["value"] is None + url = opener.calls[0]["url"] + assert url.startswith(_repo_url() + "/pipelines_config/variables/?") + assert "pagelen=100" in url + + @pytest.mark.parametrize("bad", [0, -1, True, False]) + def test_rejects_non_positive_count(self, bad: Any) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="count"): + bb_ops.vars_list(_client(opener), "acme", "widget-service", count=bad) + assert opener.calls == [] + + +# =========================================================================== +# downloads_list +# =========================================================================== + + +class TestDownloadsList: + def test_lists_downloads(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [ + {"name": "release-v1.0.zip", "size": 102400}, + {"name": "install.sh", "size": 5120}, + ] + } + ] + ) + result = bb_ops.downloads_list( + _client(opener), "acme", "widget-service" + ) + assert [d["name"] for d in result] == ["release-v1.0.zip", "install.sh"] + url = opener.calls[0]["url"] + assert url.startswith(_repo_url() + "/downloads?") + assert "pagelen=25" in url + + @pytest.mark.parametrize("bad", [0, -1, True, False]) + def test_rejects_non_positive_count(self, bad: Any) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="count"): + bb_ops.downloads_list( + _client(opener), "acme", "widget-service", count=bad + ) + assert opener.calls == [] + + +# =========================================================================== +# commits_list +# =========================================================================== + + +class TestCommitsList: + def test_all_branches_when_branch_none(self) -> None: + opener = _CaptureOpener( + [ + { + "values": [ + {"hash": "abc1", "message": "Commit 1"}, + {"hash": "abc2", "message": "Commit 2"}, + ] + } + ] + ) + result = bb_ops.commits_list( + _client(opener), "acme", "widget-service", count=2 + ) + assert [c["hash"] for c in result] == ["abc1", "abc2"] + url = opener.calls[0]["url"] + # Without branch, hits /commits (not /commits/{branch}). + assert url.startswith(_repo_url() + "/commits?") + assert "pagelen=2" in url + + def test_specific_branch(self) -> None: + opener = _CaptureOpener([{"values": []}]) + bb_ops.commits_list( + _client(opener), "acme", "widget-service", branch="main" + ) + assert opener.calls[0]["url"].startswith(_repo_url() + "/commits/main?") + + def test_branch_name_with_slash_is_url_encoded(self) -> None: + opener = _CaptureOpener([{"values": []}]) + bb_ops.commits_list( + _client(opener), + "acme", + "widget-service", + branch="feat/widget", + ) + assert opener.calls[0]["url"].startswith( + _repo_url() + "/commits/feat%2Fwidget?" + ) + + @pytest.mark.parametrize("bad_branch", ["", " ", "\n"]) + def test_rejects_empty_branch(self, bad_branch: str) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="branch"): + bb_ops.commits_list( + _client(opener), + "acme", + "widget-service", + branch=bad_branch, + ) + assert opener.calls == [] + + @pytest.mark.parametrize("bad", [0, -1, True, False]) + def test_rejects_non_positive_count(self, bad: Any) -> None: + opener = _CaptureOpener([]) + with pytest.raises(ValueError, match="count"): + bb_ops.commits_list( + _client(opener), "acme", "widget-service", count=bad + ) + assert opener.calls == [] + + def test_count_walks_pages(self) -> None: + """commits_list has the most complex path shape (branch vs + no-branch); pin its pagination behaviour symmetrically with + the other list ops.""" + opener = _CaptureOpener( + [ + { + "values": [{"hash": f"c{i:03}"} for i in range(100)], + "next": _repo_url() + "/commits?page=2", + }, + {"values": [{"hash": f"c{i:03}"} for i in range(100, 175)]}, + ] + ) + result = bb_ops.commits_list( + _client(opener), "acme", "widget-service", count=175 + ) + assert len(result) == 175 + assert "pagelen=100" in opener.calls[0]["url"] diff --git a/tests/test_git_ops.py b/tests/test_git_ops.py new file mode 100644 index 0000000..8733c20 --- /dev/null +++ b/tests/test_git_ops.py @@ -0,0 +1,1066 @@ +""" +Tests for git_ops. + +Same discipline as bb_api / bb_ops tests: assert the exact subprocess +invocation (command + args + cwd + kwargs) AND the parsing of canned +realistic git output. Without the subprocess-shape assertions a future +refactor that swapped `git status --porcelain=v2` for the v1 format +would produce different parse results but the parser tests would still +pass on their canned input — exactly the kind of regression the test +discipline is meant to catch. + +All fixture data is fictional (workspace `acme`, repo `widget-service`, +authors `alice` / `bob`). +""" + +from __future__ import annotations + +import subprocess +from types import SimpleNamespace +from typing import Any + +import pytest + +import git_ops +from git_ops import GIT_PARSE_ERROR_RETURNCODE, GitOpError + + +# --------------------------------------------------------------------------- +# Subprocess scaffolding +# --------------------------------------------------------------------------- + + +class _RecordingRunner: + """Stand-in for the `subprocess` module that records every .run() + call and returns canned (returncode, stdout, stderr) per invocation. + + Use the same instance across a single test so we can assert on the + full sequence of git commands a function issues (some functions + like git_uncommitted_changes shell out three times).""" + + def __init__(self, responses: list[tuple[int, str, str]]): + self.responses = list(responses) + self.calls: list[dict[str, Any]] = [] + + def run(self, args: Any, **kwargs: Any) -> Any: + self.calls.append({"args": args, "kwargs": kwargs}) + if not self.responses: + raise AssertionError( + f"runner received an unexpected call: {args!r}" + ) + returncode, stdout, stderr = self.responses.pop(0) + return SimpleNamespace(returncode=returncode, stdout=stdout, stderr=stderr) + + +class _MissingGitRunner: + """Stand-in that raises FileNotFoundError on .run() — simulates + `git` not being on PATH. Tests use this to verify the error wrap.""" + + @staticmethod + def run(*_args: Any, **_kwargs: Any) -> Any: + e = FileNotFoundError("[Errno 2] No such file or directory: 'git'") + e.filename = "git" + raise e + + +class _MissingCwdRunner: + """Stand-in that raises FileNotFoundError with the cwd as the + filename — simulates `path=/no/such/dir` passed to a git wrapper.""" + + def __init__(self, cwd: str): + self.cwd = cwd + + def run(self, *_args: Any, **kwargs: Any) -> Any: + e = FileNotFoundError(f"[Errno 2] No such file or directory: '{self.cwd}'") + e.filename = kwargs.get("cwd") or self.cwd + raise e + + +class _NotADirRunner: + """Stand-in that raises NotADirectoryError — simulates passing a + regular file (e.g. /etc/passwd) as path=.""" + + def __init__(self, cwd: str): + self.cwd = cwd + + def run(self, *_args: Any, **kwargs: Any) -> Any: + e = NotADirectoryError(f"[Errno 20] Not a directory: '{self.cwd}'") + e.filename = kwargs.get("cwd") or self.cwd + raise e + + +class _PermissionRunner: + """Stand-in that raises PermissionError — simulates cwd unreadable + or git binary lacking +x.""" + + def __init__(self, filename: str): + self.filename = filename + + def run(self, *_args: Any, **kwargs: Any) -> Any: + e = PermissionError(f"[Errno 13] Permission denied: '{self.filename}'") + e.filename = self.filename + raise e + + +# =========================================================================== +# git_current_branch +# =========================================================================== + + +class TestGitCurrentBranch: + def test_returns_branch_name(self) -> None: + runner = _RecordingRunner([(0, "feat/widget\n", "")]) + assert git_ops.git_current_branch(runner=runner) == "feat/widget" + # Assert the exact subprocess shape — `rev-parse --abbrev-ref HEAD` + # is the contract. A regression to `git branch --show-current` + # (different command, different edge-case behaviour on detached HEAD) + # would silently pass the canned-output test without this. + # The wrapper prepends `-c color.ui=never` to disable ANSI escapes + # in any output (relevant for diff/log paths, prepended uniformly + # for consistency). + assert runner.calls[0]["args"] == [ + "git", + "-c", + "color.ui=never", + "rev-parse", + "--abbrev-ref", + "HEAD", + ] + kwargs = runner.calls[0]["kwargs"] + assert kwargs["capture_output"] is True + assert kwargs["text"] is True + assert kwargs["check"] is False + # Explicit UTF-8 + replace on decode errors so non-ASCII filenames + # or author names don't crash inside subprocess.run on a + # locale-restricted host. + assert kwargs["encoding"] == "utf-8" + assert kwargs["errors"] == "replace" + # Timeout so a wedged git can't hang the MCP server. + assert kwargs["timeout"] == git_ops._GIT_SUBPROCESS_TIMEOUT + # stdin=DEVNULL so a credential prompt fails immediately with + # EOF rather than blocking on inherited stdin. + assert kwargs["stdin"] == subprocess.DEVNULL + # GIT_TERMINAL_PROMPT=0 + GIT_ASKPASS="" in the environment so + # git itself refuses to prompt (defence in depth alongside + # stdin=DEVNULL). + env = kwargs["env"] + assert env["GIT_TERMINAL_PROMPT"] == "0" + assert env["GIT_ASKPASS"] == "" + + def test_passes_cwd_when_path_given(self) -> None: + runner = _RecordingRunner([(0, "main\n", "")]) + git_ops.git_current_branch(path="/some/dir", runner=runner) + assert runner.calls[0]["kwargs"]["cwd"] == "/some/dir" + + def test_detached_head_returns_literal_string(self) -> None: + # On detached HEAD, `git rev-parse --abbrev-ref HEAD` returns "HEAD". + # Callers that need to detect detached state check for this literal; + # we don't special-case it inside the function. + runner = _RecordingRunner([(0, "HEAD\n", "")]) + assert git_ops.git_current_branch(runner=runner) == "HEAD" + + def test_non_git_dir_raises_giterror(self) -> None: + runner = _RecordingRunner( + [(128, "", "fatal: not a git repository (or any of the parent directories): .git\n")] + ) + with pytest.raises(GitOpError, match="not a git repository"): + git_ops.git_current_branch(runner=runner) + + def test_missing_git_binary_raises_giterror(self) -> None: + with pytest.raises(GitOpError, match="git executable not found"): + git_ops.git_current_branch(runner=_MissingGitRunner) + + def test_missing_cwd_raises_distinct_error(self) -> None: + """When `path=` points to a non-existent directory, + subprocess.run raises FileNotFoundError with e.filename set to + the cwd. Disambiguate from the missing-git case so the agent + sees the actual cause instead of chasing a PATH config.""" + with pytest.raises(GitOpError, match="path does not exist"): + git_ops.git_current_branch( + path="/no/such/dir", + runner=_MissingCwdRunner("/no/such/dir"), + ) + + def test_path_is_regular_file_raises_giterror(self) -> None: + """`path=/etc/passwd` (regular file) → NotADirectoryError, which + the round-5 fix wraps as GitOpError so callers see the uniform + contract instead of a raw OSError leak.""" + with pytest.raises(GitOpError, match="path is not a directory"): + git_ops.git_current_branch( + path="/etc/passwd", + runner=_NotADirRunner("/etc/passwd"), + ) + + def test_permission_denied_raises_giterror(self) -> None: + """PermissionError (cwd unreadable / git binary not +x) wraps as + GitOpError so the agent sees the uniform contract.""" + with pytest.raises(GitOpError, match="permission denied"): + git_ops.git_current_branch( + path="/restricted", + runner=_PermissionRunner("/restricted"), + ) + + def test_parse_error_returncode_outside_signal_range(self) -> None: + """GIT_PARSE_ERROR_RETURNCODE = -1000 is outside Python's + signal-killed convention (subprocess uses -N for signal N, e.g. + -1 for SIGHUP, -9 for SIGKILL, -15 for SIGTERM). Picking the + sentinel outside that range means callers branching on + `err.returncode == GIT_PARSE_ERROR_RETURNCODE` won't + misclassify a SIGHUP-killed git as a parse failure.""" + # No SIGNAL goes below -64 in practice; sentinel at -1000 is + # safely outside. + assert git_ops.GIT_PARSE_ERROR_RETURNCODE < -100 + + def test_timeout_raises_giterror_with_parse_returncode(self) -> None: + """A wedged git (credential-helper prompting on stdin, held + index.lock, NFS server gone) would otherwise hang the MCP + server thread. subprocess.TimeoutExpired must be wrapped as + GitOpError so callers see the uniform error surface.""" + + class _TimeoutRunner: + @staticmethod + def run(*_args: Any, **kwargs: Any) -> Any: + raise subprocess.TimeoutExpired( + cmd=kwargs.get("args") or "git", + timeout=kwargs.get("timeout", 30.0), + ) + + with pytest.raises(GitOpError, match="timed out") as exc: + git_ops.git_current_branch(runner=_TimeoutRunner) + # Timeout uses the parse-error sentinel (no real git exit code). + assert exc.value.returncode == GIT_PARSE_ERROR_RETURNCODE + + def test_empty_stdout_raises_with_parse_returncode(self) -> None: + # rev-parse should never return empty on a healthy repo; if it + # does, fail loud rather than returning "" as a branch name. + # The sentinel returncode (-1) distinguishes parse failure from + # git's own exit codes for callers branching on returncode. + runner = _RecordingRunner([(0, "\n", "")]) + with pytest.raises(GitOpError, match="empty branch name") as exc: + git_ops.git_current_branch(runner=runner) + assert exc.value.returncode == GIT_PARSE_ERROR_RETURNCODE + assert exc.value.returncode < 0 # any real git exit is >= 0 + # err.command must include the `-c color.ui=never` prefix that + # _run_git actually passed to subprocess — otherwise a caller + # introspecting err.command (or reproducing the failing call + # from the message) sees a lie. + assert "-c" in exc.value.command + assert "color.ui=never" in exc.value.command + + +# =========================================================================== +# git_remote_repo +# =========================================================================== + + +class TestGitRemoteRepo: + @pytest.mark.parametrize( + "url,expected", + [ + ("https://bitbucket.org/acme/widget-service.git\n", ("acme", "widget-service")), + ("git@bitbucket.org:acme/widget-service.git\n", ("acme", "widget-service")), + ("https://alice@bitbucket.org/acme/widget-service\n", ("acme", "widget-service")), + # Self-hosted Bitbucket Server — parser is intentionally loose, + # matches bb_api.parse_remote_url's documented contract. + ("https://bitbucket.example.com/acme/widget-service.git\n", ("acme", "widget-service")), + ], + ) + def test_parses_known_remote_shapes( + self, url: str, expected: tuple[str, str] + ) -> None: + runner = _RecordingRunner([(0, url, "")]) + assert git_ops.git_remote_repo(runner=runner) == expected + + def test_subprocess_shape(self) -> None: + runner = _RecordingRunner( + [(0, "https://bitbucket.org/acme/widget-service.git\n", "")] + ) + git_ops.git_remote_repo(runner=runner) + assert runner.calls[0]["args"] == [ + "git", + "-c", + "color.ui=never", + "remote", + "get-url", + "origin", + ] + + def test_passes_cwd_when_path_given(self) -> None: + runner = _RecordingRunner( + [(0, "https://bitbucket.org/acme/widget-service.git\n", "")] + ) + git_ops.git_remote_repo(path="/work/dir", runner=runner) + assert runner.calls[0]["kwargs"]["cwd"] == "/work/dir" + + def test_no_origin_remote_raises(self) -> None: + runner = _RecordingRunner( + [(128, "", "error: No such remote 'origin'\n")] + ) + with pytest.raises(GitOpError, match="No such remote"): + git_ops.git_remote_repo(runner=runner) + + def test_unparseable_url_raises_with_parse_returncode(self) -> None: + runner = _RecordingRunner([(0, "not-a-url\n", "")]) + with pytest.raises(GitOpError, match="could not parse") as exc: + git_ops.git_remote_repo(runner=runner) + # Parse failure: git exited 0 but we couldn't make sense of the + # output. Sentinel returncode distinguishes this from a real git + # failure (which carries git's own non-zero exit code). + assert exc.value.returncode == GIT_PARSE_ERROR_RETURNCODE + + def test_unparseable_url_redacts_embedded_credentials(self) -> None: + """If the unparseable URL carries `user:token@` embedded auth + (common in CI: https://x-token-auth:abcd@.../), the secret + must NOT land in the error message — it would flow through + MCP into agent context and downstream logs.""" + # Construct an unparseable URL (parse_remote_url's regex needs + # a `[:/]X/Y` tail; "host-only" doesn't match). + sensitive = "https://x-token-auth:supersecret123@bitbucket.org\n" + runner = _RecordingRunner([(0, sensitive, "")]) + with pytest.raises(GitOpError) as exc: + git_ops.git_remote_repo(runner=runner) + msg = str(exc.value) + assert "supersecret123" not in msg + assert "x-token-auth" not in msg + assert "[redacted]" in msg + # err.command also reflects the actual invocation including + # `-c color.ui=never`. + assert "-c" in exc.value.command + assert "color.ui=never" in exc.value.command + + def test_redaction_handles_at_sign_in_password(self) -> None: + """Round-5 regression: a password containing a literal `@` + (legal in RFC 3986 syntax) caused the old `[^/@]+@` regex to + stop at the first `@`, leaking the tail of the credential. + The fixed `[^/]+@` regex is greedy up to the last `@` before + the path.""" + sensitive = "https://x-token-auth:my@token@bitbucket.org\n" + runner = _RecordingRunner([(0, sensitive, "")]) + with pytest.raises(GitOpError) as exc: + git_ops.git_remote_repo(runner=runner) + msg = str(exc.value) + # Neither half of the credential should survive. + assert "my" not in msg or "@token" not in msg # `my@token` fragment + assert "token" not in msg + assert "x-token-auth" not in msg + assert "[redacted]" in msg + + +# =========================================================================== +# git_status (parser + driver) +# =========================================================================== + + +# Realistic `git status --porcelain=v2 --branch --untracked-files=normal` +# captures, exercised against the parser. + +STATUS_CLEAN = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head main +# branch.upstream origin/main +# branch.ab +0 -0 +""" + +STATUS_AHEAD = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head feat/widget +# branch.upstream origin/feat/widget +# branch.ab +3 -1 +""" + +STATUS_NO_UPSTREAM = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head feat/local-only +""" + +STATUS_MIXED_CHANGES = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head main +# branch.upstream origin/main +# branch.ab +0 -0 +1 M. N... 100644 100644 100644 hash1 hash1 staged_file.py +1 .M N... 100644 100644 100644 hash1 hash1 modified_file.py +1 MM N... 100644 100644 100644 hash1 hash1 both_staged_and_modified.py +? untracked.tmp +? docs/new_note.md +""" + +STATUS_RENAMED = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head main +# branch.upstream origin/main +# branch.ab +0 -0 +2 R. N... 100644 100644 100644 hash1 hash1 R100 new_name.py\told_name.py +""" + +STATUS_UNMERGED = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head main +# branch.upstream origin/main +# branch.ab +0 -0 +u UU N... 100644 100644 100644 100644 hash1 hash2 hash3 conflict.py +""" + +# Type-1 line with spaces in the filename. The path is the 9th token +# (everything from index 8 onward) so split(" ", 8) preserves the spaces. +STATUS_SPACE_IN_FILENAME = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head main +# branch.upstream origin/main +# branch.ab +0 -0 +1 .M N... 100644 100644 100644 hash1 hash1 docs/My Cool File.md +""" + +# Porcelain v2 emits "(detached)" for detached HEAD. The parser +# normalises this to "HEAD" so it matches what git_current_branch +# returns for the same state — cross-checks between the two never +# disagree on the same underlying state. +STATUS_DETACHED = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head (detached) +""" + +# A malformed branch.ab line (negative ahead, positive behind — never +# emitted by real git but defensive). The parser should ignore it +# (leave ahead/behind at 0). +STATUS_BAD_AB = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head main +# branch.upstream origin/main +# branch.ab -3 +1 +""" + +# Type-1 line with a single-char XY field (corrupted output). The +# parser must skip rather than IndexError. +STATUS_CORRUPT_XY = """\ +# branch.oid 0a1b2c3d4e5f6789abcdef0123456789abcdef01 +# branch.head main +1 X N... 100644 100644 100644 hash1 hash1 single_char_xy.py +""" + +# Freshly `git init`'d repo with no commits — branch.head reports the +# would-be branch (e.g. "main") but branch.oid is "(initial)" signaling +# unborn state. Normalise to "HEAD" for symmetry with the detached-HEAD +# convention. +STATUS_UNBORN = """\ +# branch.oid (initial) +# branch.head main +? README.md +""" + + +class TestGitStatusParser: + def test_clean_tree(self) -> None: + s = git_ops._parse_status_porcelain_v2(STATUS_CLEAN) + assert s["branch"] == "main" + assert s["upstream"] == "origin/main" + assert s["ahead"] == 0 + assert s["behind"] == 0 + assert s["clean"] is True + assert s["staged"] == [] + assert s["modified"] == [] + assert s["untracked"] == [] + assert s["unmerged"] == [] + + def test_ahead_behind(self) -> None: + s = git_ops._parse_status_porcelain_v2(STATUS_AHEAD) + assert s["ahead"] == 3 + assert s["behind"] == 1 + assert s["clean"] is True # ahead/behind alone doesn't make tree dirty + + def test_no_upstream(self) -> None: + s = git_ops._parse_status_porcelain_v2(STATUS_NO_UPSTREAM) + assert s["branch"] == "feat/local-only" + assert s["upstream"] is None + assert s["ahead"] == 0 + assert s["behind"] == 0 + + def test_mixed_changes(self) -> None: + s = git_ops._parse_status_porcelain_v2(STATUS_MIXED_CHANGES) + assert s["staged"] == ["staged_file.py", "both_staged_and_modified.py"] + assert s["modified"] == ["modified_file.py", "both_staged_and_modified.py"] + assert s["untracked"] == ["untracked.tmp", "docs/new_note.md"] + assert s["clean"] is False + + def test_renamed_keeps_new_path(self) -> None: + # Renamed entries in porcelain=v2 use a tab to separate new from old; + # we keep the new path only (matches what `git status` displays + # by default). + s = git_ops._parse_status_porcelain_v2(STATUS_RENAMED) + assert s["staged"] == ["new_name.py"] + assert s["clean"] is False + + def test_unmerged(self) -> None: + s = git_ops._parse_status_porcelain_v2(STATUS_UNMERGED) + assert s["unmerged"] == ["conflict.py"] + assert s["clean"] is False + + def test_spaces_in_filename_preserved(self) -> None: + """Type-1 line uses `split(" ", 8)` so spaces in the filename + are preserved by collapsing everything from index 8 onward into + the path token. Regression guard for this parsing choice.""" + s = git_ops._parse_status_porcelain_v2(STATUS_SPACE_IN_FILENAME) + assert s["modified"] == ["docs/My Cool File.md"] + assert s["clean"] is False + + def test_detached_head_normalised_to_HEAD(self) -> None: + """Porcelain v2 emits "(detached)"; we normalise to "HEAD" so + cross-checks with git_current_branch (which always returns + "HEAD" for detached) agree on the same underlying state.""" + s = git_ops._parse_status_porcelain_v2(STATUS_DETACHED) + assert s["branch"] == "HEAD" + + def test_bad_branch_ab_format_falls_back_to_zero(self) -> None: + """Sign-validated parsing rejects malformed branch.ab lines + (negative ahead, positive behind) — leaves ahead/behind at 0 + rather than propagating bogus values into the MCP layer.""" + s = git_ops._parse_status_porcelain_v2(STATUS_BAD_AB) + assert s["ahead"] == 0 + assert s["behind"] == 0 + + @pytest.mark.parametrize( + "ab_line", + [ + "# branch.ab +-3 -1", # double-sign smuggling: int("-3") = -3 + "# branch.ab +3 --1", # double-sign smuggling on behind + "# branch.ab ++3 -1", # not all-digits after sign + "# branch.ab +5 -junk", # half-parseable; non-atomic try would + # have left ahead=5, behind=0 + "# branch.ab + -", # empty after sign + ], + ) + def test_branch_ab_double_sign_or_junk_rejected(self, ab_line: str) -> None: + """Round-5 regression: a startswith-only check accepted '+-3' + (parsed as -3) and the non-atomic try/except let '+5 -junk' + update ahead while leaving behind at default. Strict isdigit() + validation + commit-only-on-full-success closes both.""" + text = ( + "# branch.oid 0a1b2c3d4e5f\n" + "# branch.head main\n" + f"{ab_line}\n" + ) + s = git_ops._parse_status_porcelain_v2(text) + # Both fields stay at defaults rather than picking up bogus values. + assert s["ahead"] == 0 + assert s["behind"] == 0 + + def test_corrupt_xy_field_skipped(self) -> None: + """Type-1 line with single-char XY (corrupted output) skipped + defensively rather than raising IndexError on `xy[1]`.""" + s = git_ops._parse_status_porcelain_v2(STATUS_CORRUPT_XY) + assert s["staged"] == [] + assert s["modified"] == [] + + def test_unmerged_xy_width_check_parity(self) -> None: + """Type-u (unmerged) parser must also validate XY width — same + defensive shape as type-1 / type-2. Round-3 added the check + for those two paths but missed unmerged.""" + # Single-char XY on a u-line. Token count >= 11 but xy is + # corrupted; the parser should skip rather than append. + text = ( + "# branch.oid abc\n" + "# branch.head main\n" + "u U N... 100644 100644 100644 100644 h1 h2 h3 corrupt_u.py\n" + ) + s = git_ops._parse_status_porcelain_v2(text) + assert s["unmerged"] == [] + + def test_question_mark_prefix_only_skipped(self) -> None: + """A literal `'? '` line (prefix with no path body) must NOT + append an empty string to untracked — would flip clean=False + with a phantom entry.""" + text = ( + "# branch.oid abc\n" + "# branch.head main\n" + "? \n" + "? real_untracked.py\n" + ) + s = git_ops._parse_status_porcelain_v2(text) + assert s["untracked"] == ["real_untracked.py"] + assert s["clean"] is False # the one real untracked still counts + + def test_unborn_branch_normalised_to_HEAD(self) -> None: + """branch.oid (initial) signals unborn state. Normalising to + "HEAD" gives consistent "weird state" signaling alongside the + detached-HEAD convention — git_current_branch raises on the + same repo, so the two functions agree they can't give you a + regular branch name.""" + s = git_ops._parse_status_porcelain_v2(STATUS_UNBORN) + assert s["branch"] == "HEAD" + assert s["untracked"] == ["README.md"] + + +class TestGitStatusDriver: + def test_subprocess_shape(self) -> None: + runner = _RecordingRunner([(0, STATUS_CLEAN, "")]) + git_ops.git_status(runner=runner) + # The porcelain=v2 + branch + untracked-files=normal flags are + # the contract. A regression to porcelain v1 would change every + # field we parse without breaking the canned-output tests above + # — assert the args explicitly. `-c color.ui=never` prepended by + # the _run_git wrapper. + assert runner.calls[0]["args"] == [ + "git", + "-c", + "color.ui=never", + "status", + "--porcelain=v2", + "--branch", + "--untracked-files=normal", + ] + + def test_passes_cwd_when_path_given(self) -> None: + runner = _RecordingRunner([(0, STATUS_CLEAN, "")]) + git_ops.git_status(path="/work/dir", runner=runner) + assert runner.calls[0]["kwargs"]["cwd"] == "/work/dir" + + def test_end_to_end_clean(self) -> None: + runner = _RecordingRunner([(0, STATUS_CLEAN, "")]) + s = git_ops.git_status(runner=runner) + assert s["clean"] is True + assert s["branch"] == "main" + + def test_end_to_end_dirty(self) -> None: + runner = _RecordingRunner([(0, STATUS_MIXED_CHANGES, "")]) + s = git_ops.git_status(runner=runner) + assert s["clean"] is False + assert len(s["staged"]) == 2 + assert len(s["untracked"]) == 2 + + +# =========================================================================== +# git_recent_commits +# =========================================================================== + + +# Build realistic log output using the same U+001F separator git_ops uses. +_SEP = "\x1f" + + +def _log_line(sha: str, short: str, subj: str, author: str, date: str) -> str: + return _SEP.join([sha, short, subj, author, date]) + + +LOG_THREE_COMMITS = "\n".join( + [ + _log_line( + "a" * 40, + "aaaaaaa", + "Add widget endpoint", + "Alice Garcia", + "2026-05-26T12:00:00-07:00", + ), + _log_line( + "b" * 40, + "bbbbbbb", + "Fix pagination off-by-one", + "Bob Jones", + "2026-05-25T15:30:00-07:00", + ), + _log_line( + "c" * 40, + "ccccccc", + "Refactor: rename helper", + "Alice Garcia", + "2026-05-25T10:00:00-07:00", + ), + ] +) + + +class TestGitRecentCommits: + def test_subprocess_shape(self) -> None: + runner = _RecordingRunner([(0, LOG_THREE_COMMITS, "")]) + git_ops.git_recent_commits(count=3, runner=runner) + args = runner.calls[0]["args"] + # `git -c color.ui=never log ...` — the wrapper prepends the + # color-suppression even though log doesn't strictly need it + # in our format, for uniformity. + assert args[0] == "git" + assert args[1] == "-c" + assert args[2] == "color.ui=never" + assert args[3] == "log" + # The pretty-format string uses U+001F (Unit Separator) so no + # subject line / author / date can contain the delimiter — a + # subject like "fix: split a|b|c" would otherwise break a + # pipe-delimited format. Pin the format. + pretty_arg = args[4] + assert pretty_arg.startswith("--pretty=format:") + assert "%H" in pretty_arg + assert "%h" in pretty_arg + assert "%s" in pretty_arg + assert "%an" in pretty_arg + assert "%aI" in pretty_arg # ISO 8601 strict + assert _SEP in pretty_arg + # `-n<count>` shape, not separate args + assert args[5] == "-n3" + assert args[6] == "HEAD" + # `--` terminator separates options from positional refs (defense + # against an agent-supplied ref that starts with `-`). + assert args[7] == "--" + + def test_passes_cwd_when_path_given(self) -> None: + runner = _RecordingRunner([(0, LOG_THREE_COMMITS, "")]) + git_ops.git_recent_commits(path="/work/dir", runner=runner) + assert runner.calls[0]["kwargs"]["cwd"] == "/work/dir" + + def test_parses_log_output(self) -> None: + runner = _RecordingRunner([(0, LOG_THREE_COMMITS, "")]) + commits = git_ops.git_recent_commits(count=3, runner=runner) + assert len(commits) == 3 + assert commits[0] == { + "sha": "a" * 40, + "short": "aaaaaaa", + "subject": "Add widget endpoint", + "author": "Alice Garcia", + "date": "2026-05-26T12:00:00-07:00", + } + # Author with multi-word name preserved. + assert commits[1]["author"] == "Bob Jones" + # Subject containing colon preserved. + assert commits[2]["subject"] == "Refactor: rename helper" + + def test_alternate_ref(self) -> None: + runner = _RecordingRunner([(0, LOG_THREE_COMMITS, "")]) + git_ops.git_recent_commits(count=5, ref="origin/main", runner=runner) + # ref is at index 6 now (after git -c color.ui=never log + # --pretty=... -n5). + assert runner.calls[0]["args"][6] == "origin/main" + + def test_empty_output_returns_empty_list(self) -> None: + # If git ever returns exit 0 + empty stdout, return [] not raise. + # (Note: a real git on an unborn-branch repo exits 128 — see + # test_unborn_branch_raises_giterror below.) + runner = _RecordingRunner([(0, "", "")]) + assert git_ops.git_recent_commits(runner=runner) == [] + + def test_unborn_branch_raises_giterror(self) -> None: + # Real-world behaviour: `git log` on a freshly `git init`'d + # repo with no commits exits 128 (`fatal: your current branch + # 'main' does not have any commits yet`). Pin that this + # surfaces as a GitOpError rather than silently returning []. + runner = _RecordingRunner( + [ + ( + 128, + "", + "fatal: your current branch 'main' does not have any commits yet\n", + ) + ] + ) + with pytest.raises(GitOpError, match="does not have any commits"): + git_ops.git_recent_commits(runner=runner) + + @pytest.mark.parametrize("bad", [0, -1, True, False, "ten"]) + def test_rejects_invalid_count(self, bad: Any) -> None: + runner = _RecordingRunner([]) + with pytest.raises(ValueError, match="count"): + git_ops.git_recent_commits(count=bad, runner=runner) + # No request emitted. + assert runner.calls == [] + + @pytest.mark.parametrize("bad", ["", " "]) + def test_rejects_empty_ref(self, bad: str) -> None: + runner = _RecordingRunner([]) + with pytest.raises(ValueError, match="ref"): + git_ops.git_recent_commits(ref=bad, runner=runner) + assert runner.calls == [] + + @pytest.mark.parametrize( + "bad_ref", + ["--all", "-h", "--format=junk", " --pretty=blah"], + ) + def test_rejects_ref_starting_with_dash(self, bad_ref: str) -> None: + """An agent-supplied ref like '--all' would otherwise be parsed + by git as an option flag, not a ref — silently changing the + meaning of the call. Reject at the boundary; the `--` terminator + below is the structural backstop.""" + runner = _RecordingRunner([]) + with pytest.raises(ValueError, match="must not start with '-'"): + git_ops.git_recent_commits(ref=bad_ref, runner=runner) + assert runner.calls == [] # no subprocess invocation + + def test_rejects_count_above_cap(self) -> None: + runner = _RecordingRunner([]) + with pytest.raises(ValueError, match="<="): + git_ops.git_recent_commits(count=10_001, runner=runner) + assert runner.calls == [] + + def test_count_at_cap_accepted(self) -> None: + # Verify the cap is inclusive (count == cap should pass), not + # off-by-one. The cap is _MAX_LOG_COUNT (1000). `-n` arg is at + # index 5 after the prepended `git -c color.ui=never log + # --pretty=...` tokens. + runner = _RecordingRunner([(0, "", "")]) + git_ops.git_recent_commits(count=git_ops._MAX_LOG_COUNT, runner=runner) + assert runner.calls[0]["args"][5] == f"-n{git_ops._MAX_LOG_COUNT}" + + def test_subject_containing_separator_drops_malformed_line(self) -> None: + # The U+001F separator should not appear in commit subjects in + # practice, but if a malformed line shows up we skip it rather + # than crash. Verify the parser is defensive. + text = ( + _log_line("a" * 40, "aaaaaaa", "good", "Alice", "2026-05-26T12:00:00Z") + + "\n" + + "junk\x1fline\x1fwith\x1fonly\x1f4\x1fparts\x1ftoo many" # 7 fields + ) + runner = _RecordingRunner([(0, text, "")]) + commits = git_ops.git_recent_commits(runner=runner) + # Only the well-formed line yielded. + assert len(commits) == 1 + assert commits[0]["subject"] == "good" + + def test_pure_separator_line_skipped(self) -> None: + """A line of pure separators ("\\x1f\\x1f\\x1f\\x1f") splits + into exactly 5 empty strings, passing the parts-count guard. + Without the `if not sha: continue` check we'd append a + degenerate {"sha":"", "short":"", ...} entry.""" + text = "\x1f\x1f\x1f\x1f" # 4 separators -> 5 empty fields + runner = _RecordingRunner([(0, text, "")]) + assert git_ops.git_recent_commits(runner=runner) == [] + + def test_subject_with_carriage_return_preserved(self) -> None: + """A commit subject containing \\r (legal in git; happens when + an author commits with `git commit -F` from a Windows-line-ended + file) must NOT fragment into two "lines" under splitlines(), + which would silently drop the entire commit.""" + text = _log_line( + "a" * 40, + "aaaaaaa", + "subject with\rcarriage return", + "Alice", + "2026-05-26T12:00:00Z", + ) + runner = _RecordingRunner([(0, text, "")]) + commits = git_ops.git_recent_commits(runner=runner) + assert len(commits) == 1 + assert commits[0]["subject"] == "subject with\rcarriage return" + + +# =========================================================================== +# git_uncommitted_changes +# =========================================================================== + + +class TestGitUncommittedChanges: + def test_three_subprocess_calls(self) -> None: + runner = _RecordingRunner( + [ + (0, "diff --staged\n", ""), + (0, "diff --working\n", ""), + (0, "untracked1.py\nuntracked2.md\n", ""), + ] + ) + result = git_ops.git_uncommitted_changes(runner=runner) + # Verify the EXACT command shape for all three calls. A regression + # to a single `git status -s` would lose the diff content; pinning + # each command separately catches that. + assert runner.calls[0]["args"] == [ + "git", + "-c", + "color.ui=never", + "diff", + "--cached", + ] + assert runner.calls[1]["args"] == [ + "git", + "-c", + "color.ui=never", + "diff", + ] + assert runner.calls[2]["args"] == [ + "git", + "-c", + "color.ui=never", + "ls-files", + "--others", + "--exclude-standard", + ] + assert result == { + "staged_diff": "diff --staged\n", + "working_diff": "diff --working\n", + "untracked_files": ["untracked1.py", "untracked2.md"], + "untracked_files_omitted": 0, + } + + def test_oversize_diff_truncated_with_marker(self) -> None: + """A multi-MiB diff (someone accidentally staged a generated + blob) must NOT be returned in full — would OOM the MCP server. + Truncation marker tells the caller what happened.""" + big_diff = "+" + ("x" * (git_ops._MAX_DIFF_BYTES + 100)) + runner = _RecordingRunner( + [ + (0, big_diff, ""), + (0, "", ""), + (0, "", ""), + ] + ) + result = git_ops.git_uncommitted_changes(runner=runner) + # The returned staged_diff is at most _MAX_DIFF_BYTES plus the + # truncation marker length. + assert len(result["staged_diff"]) <= ( + git_ops._MAX_DIFF_BYTES + len(git_ops._DIFF_TRUNCATION_MARKER) + ) + assert "truncated by bb MCP server" in result["staged_diff"] + # Below-cap diffs are returned verbatim. + small_runner = _RecordingRunner( + [ + (0, "diff body\n", ""), + (0, "", ""), + (0, "", ""), + ] + ) + small = git_ops.git_uncommitted_changes(runner=small_runner) + assert small["staged_diff"] == "diff body\n" + + def test_oversize_non_ascii_diff_byte_capped(self) -> None: + """Regression guard for round-4 finding: an earlier fast path + used `len(text) <= cap` which is wrong for non-ASCII content + (UTF-8 bytes >= chars). A 600K-emoji string was 600K chars + (under the 1 MiB char cap) but encoded to 2.4 MiB. The fixed + fast path is gated on `text.isascii()`.""" + # 600_000 4-byte emoji ~= 2.4 MiB encoded, well over 1 MiB cap. + big_emoji_diff = "+" + ("\U0001F600" * 600_000) + # Sanity: this fixture must actually exceed the byte cap, otherwise + # the regression isn't being exercised. + assert ( + len(big_emoji_diff.encode("utf-8")) > git_ops._MAX_DIFF_BYTES + ) + runner = _RecordingRunner( + [ + (0, big_emoji_diff, ""), + (0, "", ""), + (0, "", ""), + ] + ) + result = git_ops.git_uncommitted_changes(runner=runner) + # Returned diff is byte-capped, not char-capped. + assert len(result["staged_diff"].encode("utf-8")) <= ( + git_ops._MAX_DIFF_BYTES + len(git_ops._DIFF_TRUNCATION_MARKER.encode("utf-8")) + ) + assert "truncated by bb MCP server" in result["staged_diff"] + + def test_oversize_untracked_list_capped(self) -> None: + """A repo that forgot to gitignore node_modules / .venv could + return hundreds of thousands of untracked paths. Cap at + _MAX_PATH_LIST so the MCP server doesn't OOM on the JSON + serialisation. Omitted count surfaces in a sibling field so + callers iterating the list see only real paths.""" + # Produce more paths than the cap allows. + many_paths = "\n".join(f"file{i:06}.tmp" for i in range(15_000)) + runner = _RecordingRunner( + [ + (0, "", ""), # staged_diff + (0, "", ""), # working_diff + (0, many_paths, ""), # untracked + ] + ) + result = git_ops.git_uncommitted_changes(runner=runner) + # Returned list is capped to exactly _MAX_PATH_LIST entries — + # no in-list sentinel, so callers iterating with os.stat / + # Path.exists don't hit a non-path string. + assert len(result["untracked_files"]) == git_ops._MAX_PATH_LIST + # Every entry is a real-shaped path. + assert all(p.startswith("file") for p in result["untracked_files"]) + # Omitted count surfaces in a sibling field. + assert result["untracked_files_omitted"] == 5_000 + + def test_untracked_omitted_zero_when_under_cap(self) -> None: + """Sibling field is 0 (not missing) when no truncation happened. + Lets agents check a single field unconditionally rather than + defending against KeyError.""" + runner = _RecordingRunner( + [ + (0, "", ""), + (0, "", ""), + (0, "one.py\ntwo.py\n", ""), + ] + ) + result = git_ops.git_uncommitted_changes(runner=runner) + assert result["untracked_files"] == ["one.py", "two.py"] + assert result["untracked_files_omitted"] == 0 + + +class TestPathListCap: + """Direct unit tests for the _truncated_path_list helper, separate + from the full git_uncommitted_changes / git_status integration + paths so the cap behaviour is pinned at one place.""" + + def test_under_cap_returns_verbatim_with_zero_omitted(self) -> None: + paths = [f"f{i}" for i in range(100)] + capped, omitted = git_ops._truncated_path_list(paths) + assert capped == paths + assert omitted == 0 + + def test_at_cap_returns_verbatim_with_zero_omitted(self) -> None: + paths = [f"f{i}" for i in range(git_ops._MAX_PATH_LIST)] + capped, omitted = git_ops._truncated_path_list(paths) + assert capped == paths + assert omitted == 0 + + def test_one_over_cap_reports_one_omitted(self) -> None: + paths = [f"f{i}" for i in range(git_ops._MAX_PATH_LIST + 1)] + capped, omitted = git_ops._truncated_path_list(paths) + # The returned list is EXACTLY _MAX_PATH_LIST entries — no + # sentinel. Callers iterating with os.stat won't trip. + assert len(capped) == git_ops._MAX_PATH_LIST + assert omitted == 1 + # Every entry is a real path (no "<..." marker). + assert all(p.startswith("f") for p in capped) + + def test_many_over_cap_reports_full_omitted_count(self) -> None: + paths = [f"f{i}" for i in range(git_ops._MAX_PATH_LIST + 50)] + capped, omitted = git_ops._truncated_path_list(paths) + assert len(capped) == git_ops._MAX_PATH_LIST + assert omitted == 50 + + def test_git_status_caps_file_lists_with_omitted_siblings(self) -> None: + """Same cap applies inside _parse_status_porcelain_v2 — staged / + modified / untracked / unmerged lists are bounded, and each has + a sibling `<key>_omitted` field surfacing the dropped count.""" + many_untracked = "\n".join( + f"? file{i:05}.tmp" for i in range(12_000) + ) + header = "# branch.oid abc\n# branch.head main\n" + status_text = header + many_untracked + s = git_ops._parse_status_porcelain_v2(status_text) + assert len(s["untracked"]) == git_ops._MAX_PATH_LIST + assert s["untracked_omitted"] == 2_000 + # Other lists were empty, so their omitted siblings are 0. + assert s["staged_omitted"] == 0 + assert s["modified_omitted"] == 0 + assert s["unmerged_omitted"] == 0 + + def test_clean_tree_returns_empties(self) -> None: + runner = _RecordingRunner( + [ + (0, "", ""), # no staged diff + (0, "", ""), # no working diff + (0, "", ""), # no untracked + ] + ) + assert git_ops.git_uncommitted_changes(runner=runner) == { + "staged_diff": "", + "working_diff": "", + "untracked_files": [], + "untracked_files_omitted": 0, + } + + def test_propagates_giterror_on_failure(self) -> None: + runner = _RecordingRunner( + [(128, "", "fatal: not a git repository\n")] + ) + with pytest.raises(GitOpError, match="not a git repository"): + git_ops.git_uncommitted_changes(runner=runner) + + def test_passes_cwd(self) -> None: + runner = _RecordingRunner( + [ + (0, "", ""), + (0, "", ""), + (0, "", ""), + ] + ) + git_ops.git_uncommitted_changes(path="/work/dir", runner=runner) + for call in runner.calls: + assert call["kwargs"]["cwd"] == "/work/dir" diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py new file mode 100644 index 0000000..8a6c0b7 --- /dev/null +++ b/tests/test_mcp_server.py @@ -0,0 +1,1052 @@ +""" +Tests for mcp_server. + +Discipline: + - BB_MCP_SKIP_BOOTSTRAP=1 is set in conftest.py BEFORE this module + imports, so importing mcp_server skips the venv bootstrap and uses + the FastMCP stub (decorator returns the function unchanged). + - Tools are tested by patching bb_ops / git_ops functions at the + module level, calling the tool directly, and asserting (a) the + underlying function was called with the right arguments, (b) the + success-path result shape, and (c) the error-path shape for each + expected exception kind. + - No live HTTP / subprocess: bb_ops / git_ops have their own tests + for that. This file pins the WIRING — the layer that decides which + bb_ops function to call and how to shape the response dict. + +All fixtures are fictional (acme / widget-service / alice / bob). +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import patch + +import pytest + +import bb_api +import bb_ops +import git_ops +import mcp_server + + +# --------------------------------------------------------------------------- +# Shared fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _reset_state(monkeypatch: pytest.MonkeyPatch) -> None: + """Reset the module-level client cache + default cwd between tests so + one test's state doesn't leak into another's. + + Also scrubs the BB_* auth env vars so a developer running pytest + locally with `BB_USER=...` exported in their shell doesn't accidentally + let `bb_api.load_config()` pick up their real config in any test that + reaches `_get_client()` without the `stub_client` fixture. Without + this scrub, a future test could pass locally and fail in CI (or + vice versa) based on developer env, not code state.""" + mcp_server._reset_client_cache() + # Default BB_DEFAULT_REPO_PATH to a stable value so tests can assert on it. + monkeypatch.setenv("BB_DEFAULT_REPO_PATH", "/test/cwd") + # Scrub ambient BB config so the suite is hermetic against dev env. + for k in ("BB_USER", "BB_TOKEN", "BB_WORKSPACE", "BB_API_BASE"): + monkeypatch.delenv(k, raising=False) + + +@pytest.fixture +def stub_client(monkeypatch: pytest.MonkeyPatch) -> bb_api.BBClient: + """Inject a stub client into the cache so tools don't try to read + real config files. Returns the client so tests can inspect it.""" + cfg = bb_api.BBConfig( + user="alice@example.com", + token="tok-xyz", + workspace="acme", + api_base=bb_api.DEFAULT_API_BASE, + ) + # We don't need a real opener — tests patch bb_ops/git_ops functions + # before they're called, so the client's HTTP layer is never invoked. + client = bb_api.BBClient(cfg) + monkeypatch.setattr(mcp_server, "_client_cache", client) + return client + + +# --------------------------------------------------------------------------- +# Tool registry +# --------------------------------------------------------------------------- + + +# Every tool the agent expects. If we add or remove a tool, this list +# updates and the count assertion below catches an accidental drop. +EXPECTED_TOOLS = { + # Pipelines + "pipelines_list", + "pipeline_show", + "pipeline_steps", + "pipeline_trigger", + "pipeline_stop", + "pipeline_logs", + # PRs + "prs_list", + "pr_show", + "pr_activity", + "pr_create", + "pr_approve", + "pr_unapprove", + "pr_merge", + "pr_decline", + "pr_diff", + "pr_comments_list", + "pr_comment_add", + # Repos / branches / metadata + "repos_list", + "repo_show", + "branches_list", + "branch_show", + "vars_list", + "downloads_list", + "commits_list", + # Git context + "git_current_branch", + "git_status", + "git_remote_repo", + "git_recent_commits", + "git_uncommitted_changes", + # Meta + "whoami", +} + + +def test_all_expected_tools_registered() -> None: + """The FastMCP stub records every @mcp.tool()-decorated function in + its _tools dict. Pin the exact set so an accidental rename or drop + is caught by the suite — the agent depends on these exact names.""" + registered = set(mcp_server.mcp._tools.keys()) + assert registered == EXPECTED_TOOLS, ( + f"Tool set drift. Missing: {EXPECTED_TOOLS - registered}. " + f"Extra: {registered - EXPECTED_TOOLS}." + ) + + +def test_tool_count_matches_expectation() -> None: + """Independent sanity check — pin the exact number so a silent + regression that drops a registration is visible. 30 = 6 pipelines + + 11 PRs + 7 repos/metadata + 5 git context + 1 meta.""" + assert len(mcp_server.mcp._tools) == 30 + + +# --------------------------------------------------------------------------- +# _resolve_repo +# --------------------------------------------------------------------------- + + +class TestResolveRepo: + def test_empty_repo_uses_git_remote( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + # `""` triggers git_remote_repo auto-detect from BB_DEFAULT_REPO_PATH. + monkeypatch.setattr( + git_ops, "git_remote_repo", + lambda path=None: ("from-remote", "widget-service"), + ) + client, ws, slug = mcp_server._resolve_repo("") + assert client is stub_client + assert ws == "from-remote" + assert slug == "widget-service" + + def test_bare_slug_uses_config_workspace( + self, stub_client: bb_api.BBClient + ) -> None: + # No slash → workspace defaults to client.config.workspace. + client, ws, slug = mcp_server._resolve_repo("my-repo") + assert ws == "acme" # from BBConfig + assert slug == "my-repo" + + def test_workspace_slash_repo_overrides( + self, stub_client: bb_api.BBClient + ) -> None: + # "ws/repo" overrides the configured workspace. + client, ws, slug = mcp_server._resolve_repo("other/cool-repo") + assert ws == "other" + assert slug == "cool-repo" + + @pytest.mark.parametrize( + "bad", + ["a/b/c", "/repo", "ws/", "/", "//"], + ) + def test_malformed_repo_raises_value_error( + self, stub_client: bb_api.BBClient, bad: str + ) -> None: + with pytest.raises(ValueError, match="repo must be"): + mcp_server._resolve_repo(bad) + + def test_strips_whitespace_before_parsing( + self, stub_client: bb_api.BBClient + ) -> None: + """A sloppy paste like ' acme/widget ' must not slip through + as workspace=' acme' and surface as a deep API failure.""" + client, ws, slug = mcp_server._resolve_repo(" acme/widget ") + assert ws == "acme" + assert slug == "widget" + + def test_whitespace_only_triggers_autodetect( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Whitespace-only repo must not slip through; it should trip + the same auto-detect path as the empty string.""" + monkeypatch.setattr( + git_ops, "git_remote_repo", + lambda path=None: ("from-remote", "ws"), + ) + _, ws, slug = mcp_server._resolve_repo(" ") + assert ws == "from-remote" + + def test_none_treated_as_empty( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """JSON `null` from the MCP client deserialises to None. + Without normalisation, .strip() would crash uncaught with + AttributeError.""" + monkeypatch.setattr( + git_ops, "git_remote_repo", + lambda path=None: ("from-remote", "ws"), + ) + _, ws, _ = mcp_server._resolve_repo(None) + assert ws == "from-remote" + + def test_inner_slug_parts_stripped( + self, stub_client: bb_api.BBClient + ) -> None: + """'acme/ widget' (whitespace on the slug-half after split) + must not slip through as ws='acme', slug=' widget' and 404 + on `/repositories/acme/%20widget`.""" + client, ws, slug = mcp_server._resolve_repo("acme/ widget") + assert ws == "acme" + assert slug == "widget" + + def test_repo_validated_before_get_client( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """A fresh-machine user without config + a malformed slug + should see the ValueError (real cause) not BBConfigError + (masking failure).""" + mcp_server._reset_client_cache() + def raise_config(*_args: Any, **_kwargs: Any) -> Any: + raise bb_api.BBConfigError("Missing BB_USER") + monkeypatch.setattr(bb_api, "load_config", raise_config) + # Malformed repo: three parts. Should raise ValueError, NOT + # BBConfigError — proves the shape check runs before + # _get_client(). + with pytest.raises(ValueError, match="repo must be"): + mcp_server._resolve_repo("a/b/c") + + @pytest.mark.parametrize("bare", [".", ".."]) + def test_bare_dot_slug_validated_before_get_client( + self, monkeypatch: pytest.MonkeyPatch, bare: str + ) -> None: + """Round-3 finding: the bare-slug fallback bypassed the same + validation the slash-containing branch got. A fresh-machine + user with `repo='.'` should see the actual ValueError, not + BBConfigError masking it.""" + mcp_server._reset_client_cache() + def raise_config(*_args: Any, **_kwargs: Any) -> Any: + raise bb_api.BBConfigError("Missing BB_USER") + monkeypatch.setattr(bb_api, "load_config", raise_config) + with pytest.raises(ValueError, match=r"'\.'"): + mcp_server._resolve_repo(bare) + + +# --------------------------------------------------------------------------- +# _error_dict +# --------------------------------------------------------------------------- + + +class TestErrorDict: + def test_bbapierror_carries_status_url_body(self) -> None: + e = bb_api.BBApiError(404, "https://x/y", '{"error":{"message":"nope"}}') + d = mcp_server._error_dict(e) + assert d["ok"] is False + assert d["kind"] == "BBApiError" + assert d["status"] == 404 + assert d["url"] == "https://x/y" + assert "nope" in d["body"] + + def test_bbapierror_redacts_signed_s3_url(self) -> None: + """Round-2/3/4 SECURITY findings: pipeline_logs / pr_diff follow + Bitbucket's 307 to a signed S3 URL. If S3 then returns non-3xx + (clock skew, expired, network hiccup), BBApiError carries the + signed URL with AWS credentials in the query string. The agent + error dict must NOT propagate it through ANY field. + + Round 2 fixed `url`; round 3 found `message` still leaked; + round 4 found `body` still leaked. Pin EVERY string field that + could carry the URL so a future regression can't add a new + unredacted field without the test catching it.""" + signed = ( + "https://bbuseruploads.s3.amazonaws.com/path/to/log?" + "X-Amz-Signature=abcd1234supersecret&X-Amz-Credential=AKIAEXAMPLE" + "&Expires=12345" + ) + # Realistic API body that echoes the upstream URL — typical for + # nginx / proxy-layered error pages. + body_with_url = ( + f"<html>Bad gateway: upstream {signed} failed: connection reset</html>" + ) + e = bb_api.BBApiError(403, signed, body_with_url) + d = mcp_server._error_dict(e) + # EVERY string field free of every secret bit. + for field in ("url", "message", "body"): + assert "abcd1234supersecret" not in d[field], ( + f"signature leaked through {field}: {d[field]!r}" + ) + assert "AKIAEXAMPLE" not in d[field], ( + f"AWS access key leaked through {field}: {d[field]!r}" + ) + # Path part preserved in url so the agent knows what host was called. + assert "bbuseruploads.s3.amazonaws.com" in d["url"] + assert "redacted-signed-url-params" in d["url"] + + def test_bbapierror_redacts_embedded_creds(self) -> None: + # Body field can also carry the credentialed URL (e.g. + # `curl` showing the failing URL back in its error output). + e = bb_api.BBApiError( + 401, + "https://user:supersecret@api.bitbucket.org/2.0/foo", + "Auth failed for https://user:supersecret@api.bitbucket.org/2.0/foo", + ) + d = mcp_server._error_dict(e) + for field in ("url", "message", "body"): + assert "supersecret" not in d[field], ( + f"credential leaked through {field}: {d[field]!r}" + ) + assert "[redacted]" in d["url"] + assert "[redacted]" in d["message"] + assert "[redacted]" in d["body"] + + def test_giterror_stderr_redacted(self) -> None: + """Phase 4.7+ will add git wrappers that touch remote repos + (fetch / push / ls-remote). Their stderr commonly contains + `fatal: unable to access 'https://x-token-auth:TOKEN@bb.org/...'`. + _error_dict must redact stderr the same way it redacts message + and body.""" + e = git_ops.GitOpError( + ["git", "fetch"], + 128, + "fatal: unable to access 'https://x-token-auth:SECRETTOKEN@bitbucket.org/foo/bar.git/': The requested URL returned error: 401", + ) + d = mcp_server._error_dict(e) + for field in ("message", "stderr"): + assert "SECRETTOKEN" not in d[field], ( + f"git token leaked through {field}: {d[field]!r}" + ) + assert "x-token-auth" not in d[field], ( + f"git username leaked through {field}: {d[field]!r}" + ) + assert "[redacted]" in d["stderr"] + + def test_signed_url_indicators_case_insensitive(self) -> None: + """Round-3 finding: MinIO / R2 / Backblaze / mixed-case AWS + variants may use different capitalisations of the signature + param. Match case-insensitively.""" + # Lowercase variant. + e1 = bb_api.BBApiError( + 403, + "https://example.com/log?x-amz-signature=secret123", + "AccessDenied", + ) + d1 = mcp_server._error_dict(e1) + assert "secret123" not in d1["url"] + assert "secret123" not in d1["message"] + + # Plain `Signature=` (used by some non-AWS S3-compatible + # services). + e2 = bb_api.BBApiError( + 403, + "https://r2.example.com/log?Signature=secret456", + "Forbidden", + ) + d2 = mcp_server._error_dict(e2) + assert "secret456" not in d2["url"] + assert "secret456" not in d2["message"] + + def test_redacts_azure_sas_url(self) -> None: + """Round-4 finding: Azure Blob SAS URLs use ?sv=...&sig=...&se=... + (no `signature=` suffix). The substring check must catch the + `sig=` short form.""" + sas = "https://acct.blob.core.windows.net/c/blob?sv=2020&sig=ABCSECRET&se=2026" + e = bb_api.BBApiError(403, sas, "AuthorizationFailed") + d = mcp_server._error_dict(e) + for field in ("url", "message"): + assert "ABCSECRET" not in d[field] + + def test_redacts_bearer_token_in_url(self) -> None: + """Bearer tokens in query string (`?access_token=...` / + `?api_key=...`) also redacted.""" + for param in ("access_token", "api_key"): + e = bb_api.BBApiError( + 401, + f"https://api.example.com/endpoint?{param}=SECRET_BEARER", + "Unauthorized", + ) + d = mcp_server._error_dict(e) + assert "SECRET_BEARER" not in d["url"] + assert "SECRET_BEARER" not in d["message"] + + def test_safe_text_redacts_ssh_url_in_free_text(self) -> None: + """Round-4 finding: _redact_message only matched http(s)://. + ssh:// URLs with embedded passphrases (and other schemes) must + also be caught. Validates the broadened _safe_text helper.""" + # Construct an error message that embeds an ssh:// URL with auth. + text = "Could not read from remote repository ssh://x-token:SSHPASS@bb.org/foo.git: connection refused" + redacted = mcp_server._safe_text(text) + assert "SSHPASS" not in redacted + assert "x-token" not in redacted + + def test_bbopnotfound_kind(self) -> None: + e = bb_ops.BBOpNotFound("pipeline #42 not found") + d = mcp_server._error_dict(e) + assert d["ok"] is False + assert d["kind"] == "BBOpNotFound" + # No HTTP fields — distinct from BBApiError. + assert "status" not in d + + def test_giterror_carries_returncode_stderr(self) -> None: + e = git_ops.GitOpError(["git", "status"], 128, "fatal: not a git repo") + d = mcp_server._error_dict(e) + assert d["kind"] == "GitOpError" + assert d["returncode"] == 128 + assert "not a git repo" in d["stderr"] + + def test_value_error_kind(self) -> None: + e = ValueError("bad input") + d = mcp_server._error_dict(e) + assert d["kind"] == "ValueError" + assert d["message"] == "bad input" + + +# --------------------------------------------------------------------------- +# Per-tool wiring tests +# --------------------------------------------------------------------------- +# +# Each pipeline / PR / repo tool dispatches to bb_ops.<func>(...). We patch +# the bb_ops function to a recorder, call the tool, then assert (a) what +# the tool passed to bb_ops, and (b) the response-dict shape. +# +# These tests deliberately do NOT exercise bb_ops's own logic — that's +# covered comprehensively in test_bb_ops_*.py. This file pins the WIRING. + + +def _recorder(return_value: Any) -> Any: + """Build a stub that records its calls and returns a fixed value.""" + calls: list[tuple[tuple[Any, ...], dict[str, Any]]] = [] + + def fn(*args: Any, **kwargs: Any) -> Any: + calls.append((args, kwargs)) + return return_value + + fn.calls = calls # type: ignore[attr-defined] + return fn + + +class TestPipelineTools: + def test_pipelines_list_dispatches_and_shapes( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder([{"build_number": 42}]) + with patch.object(bb_ops, "pipelines_list", recorder): + out = mcp_server.pipelines_list(repo="my-repo", count=5, branch="main") + # bb_ops.pipelines_list received the resolved (workspace, repo, count, branch, sort). + assert recorder.calls[0][0] == (stub_client, "acme", "my-repo") + assert recorder.calls[0][1] == { + "count": 5, + "branch": "main", + "sort": "-created_on", # default + } + assert out == { + "ok": True, + "workspace": "acme", + "repo": "my-repo", + "pipelines": [{"build_number": 42}], + } + + def test_pipelines_list_sort_kwarg(self, stub_client: bb_api.BBClient) -> None: + """`sort=` lets the agent ask for oldest-first or sort-by-completion.""" + recorder = _recorder([]) + with patch.object(bb_ops, "pipelines_list", recorder): + mcp_server.pipelines_list(repo="my-repo", sort="created_on") + assert recorder.calls[0][1]["sort"] == "created_on" + + def test_pipelines_list_empty_branch_passes_none( + self, stub_client: bb_api.BBClient + ) -> None: + # Empty-string `branch` → None at the bb_ops boundary (so the + # `target.ref_name` query param is omitted, not sent as ""). + recorder = _recorder([]) + with patch.object(bb_ops, "pipelines_list", recorder): + mcp_server.pipelines_list(repo="my-repo") + assert recorder.calls[0][1]["branch"] is None + + def test_pipeline_show_wraps_bbopnotfound( + self, stub_client: bb_api.BBClient + ) -> None: + def raise_not_found(*_args: Any, **_kwargs: Any) -> Any: + raise bb_ops.BBOpNotFound("pipeline #999 not found") + + with patch.object(bb_ops, "pipeline_show", raise_not_found): + out = mcp_server.pipeline_show(number=999, repo="my-repo") + assert out["ok"] is False + assert out["kind"] == "BBOpNotFound" + assert "#999" in out["message"] + # Request identifier threaded into the error dict so an agent + # running parallel pipeline_show calls can correlate failures + # with originating requests. + assert out["number"] == 999 + + def test_pipeline_show_wraps_bbapierror( + self, stub_client: bb_api.BBClient + ) -> None: + def raise_api(*_args: Any, **_kwargs: Any) -> Any: + raise bb_api.BBApiError(403, "https://x", '{"error":"forbidden"}') + + with patch.object(bb_ops, "pipeline_show", raise_api): + out = mcp_server.pipeline_show(number=42, repo="my-repo") + assert out["ok"] is False + assert out["status"] == 403 + assert "forbidden" in out["body"] + assert out["number"] == 42 + + def test_pipeline_trigger_empty_pattern_passes_none( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder({"build_number": 100}) + with patch.object(bb_ops, "pipeline_trigger", recorder): + mcp_server.pipeline_trigger(branch="main", repo="my-repo", pattern="") + # Empty pattern at MCP boundary becomes None at bb_ops boundary — + # matters because bb_ops treats None as "default pipeline" but + # would raise on empty string. + assert recorder.calls[0][1]["pattern"] is None + + @pytest.mark.parametrize("bad_branch", ["", " ", "\n\t"]) + def test_pipeline_trigger_rejects_empty_or_whitespace_branch( + self, stub_client: bb_api.BBClient, bad_branch: str + ) -> None: + """Round-3 finding: pipeline_trigger forwarded branch verbatim, + unlike pipelines_list / commits_list which funnel through + _opt_str. Whitespace-only branch would silently POST + target.ref_name=' ' and 4xx with an opaque body.""" + recorder = _recorder({}) + with patch.object(bb_ops, "pipeline_trigger", recorder): + out = mcp_server.pipeline_trigger(branch=bad_branch, repo="my-repo") + assert out["ok"] is False + assert out["kind"] == "ValueError" + assert recorder.calls == [] # bb_ops not reached + + def test_pipeline_trigger_strips_branch_whitespace( + self, stub_client: bb_api.BBClient + ) -> None: + """Trailing/leading whitespace on a real branch name gets + stripped so the API call uses the clean value.""" + recorder = _recorder({"build_number": 101}) + with patch.object(bb_ops, "pipeline_trigger", recorder): + mcp_server.pipeline_trigger(branch=" main ", repo="my-repo") + assert recorder.calls[0][1]["branch"] == "main" + + def test_pipeline_logs_returns_log_text( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder("+ echo hello\nhello\n") + with patch.object(bb_ops, "pipeline_logs", recorder): + out = mcp_server.pipeline_logs(number=42, step_index=0, repo="my-repo") + assert out["log"] == "+ echo hello\nhello\n" + assert out["step_index"] == 0 + # Default timeout passed through. + assert recorder.calls[0][1]["timeout"] == 120.0 + + def test_pipeline_logs_custom_timeout( + self, stub_client: bb_api.BBClient + ) -> None: + """Agent can extend timeout for pipelines with huge log payloads.""" + recorder = _recorder("") + with patch.object(bb_ops, "pipeline_logs", recorder): + mcp_server.pipeline_logs( + number=42, step_index=0, repo="my-repo", timeout=600.0 + ) + assert recorder.calls[0][1]["timeout"] == 600.0 + + +class TestPullRequestTools: + def test_pr_create_auto_detects_source_branch( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """When source_branch is empty, pr_create auto-detects via + git_ops.git_current_branch — matches bash `bb pr-create`.""" + monkeypatch.setattr( + git_ops, "git_current_branch", + lambda path=None: "feat/auto-detected", + ) + recorder = _recorder({"id": 7}) + with patch.object(bb_ops, "pr_create", recorder): + mcp_server.pr_create(title="Hi", repo="my-repo") + # The resolved source_branch comes from git_current_branch. + assert recorder.calls[0][1]["source_branch"] == "feat/auto-detected" + + def test_pr_create_explicit_source_branch_used_as_is( + self, + stub_client: bb_api.BBClient, + ) -> None: + recorder = _recorder({"id": 8}) + with patch.object(bb_ops, "pr_create", recorder): + mcp_server.pr_create( + title="Hi", + source_branch="feat/explicit", + repo="my-repo", + ) + assert recorder.calls[0][1]["source_branch"] == "feat/explicit" + + def test_pr_create_whitespace_source_branch_triggers_autodetect( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Whitespace-only source_branch should NOT slip through to + bb_ops (which would then raise) — the whitespace trips the + auto-detect path.""" + monkeypatch.setattr( + git_ops, "git_current_branch", + lambda path=None: "feat/detected", + ) + recorder = _recorder({"id": 9}) + with patch.object(bb_ops, "pr_create", recorder): + mcp_server.pr_create(title="Hi", source_branch=" ", repo="my-repo") + assert recorder.calls[0][1]["source_branch"] == "feat/detected" + + def test_pr_create_rejects_detached_head_autodetect( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """git_current_branch returns the literal 'HEAD' for both + detached and unborn state — Bitbucket would accept it silently + and create a degenerate PR. Surface a local error instead.""" + monkeypatch.setattr( + git_ops, "git_current_branch", + lambda path=None: "HEAD", + ) + # bb_ops.pr_create must NOT be called when source_branch can't + # be resolved cleanly. + recorder = _recorder({"id": 10}) + with patch.object(bb_ops, "pr_create", recorder): + out = mcp_server.pr_create(title="Hi", repo="my-repo") + assert out["ok"] is False + assert out["kind"] == "ValueError" + assert "HEAD" in out["message"] + # Identifier threading: title surfaces on the error path so + # parallel pr_create fan-outs can correlate. + assert out["title"] == "Hi" + assert recorder.calls == [] # bb_ops.pr_create not reached + + def test_pr_create_rejects_explicit_head_source_branch( + self, + stub_client: bb_api.BBClient, + ) -> None: + """Round-3 finding: round-2 fix rejected 'HEAD' from auto-detect + but the user-supplied path forwarded it verbatim. The check + must apply to BOTH entry points.""" + recorder = _recorder({"id": 11}) + with patch.object(bb_ops, "pr_create", recorder): + out = mcp_server.pr_create( + title="Hi", source_branch="HEAD", repo="my-repo" + ) + assert out["ok"] is False + assert out["kind"] == "ValueError" + assert "HEAD" in out["message"] + assert recorder.calls == [] # bb_ops.pr_create not reached + + def test_pr_unapprove_dispatches( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder(None) + with patch.object(bb_ops, "pr_unapprove", recorder): + out = mcp_server.pr_unapprove(pr_id=42, repo="my-repo") + assert recorder.calls[0][0] == (stub_client, "acme", "my-repo", 42) + assert out["pr_id"] == 42 + + def test_pr_comment_add_shape( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder({"id": 99, "content": {"raw": "LGTM"}}) + with patch.object(bb_ops, "pr_comment_add", recorder): + out = mcp_server.pr_comment_add(pr_id=42, body="LGTM", repo="my-repo") + assert recorder.calls[0][0] == (stub_client, "acme", "my-repo", 42, "LGTM") + assert out["comment"]["id"] == 99 + + def test_pr_merge_empty_message_passes_none( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder({"state": "MERGED"}) + with patch.object(bb_ops, "pr_merge", recorder): + mcp_server.pr_merge(pr_id=42, repo="my-repo", message="") + assert recorder.calls[0][1]["message"] is None + + +class TestRepoTools: + def test_repos_list_uses_config_workspace_when_omitted( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder([]) + with patch.object(bb_ops, "repos_list", recorder): + mcp_server.repos_list() + # workspace= defaulted to client.config.workspace ("acme"). + assert recorder.calls[0][1]["workspace"] == "acme" + + def test_repos_list_explicit_workspace( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder([]) + with patch.object(bb_ops, "repos_list", recorder): + mcp_server.repos_list(workspace="other") + assert recorder.calls[0][1]["workspace"] == "other" + + def test_repos_list_strips_workspace_whitespace( + self, stub_client: bb_api.BBClient + ) -> None: + """' acme' / 'acme ' must not slip through and 404 on + `/repositories/%20acme`.""" + recorder = _recorder([]) + with patch.object(bb_ops, "repos_list", recorder): + mcp_server.repos_list(workspace=" other-org ") + assert recorder.calls[0][1]["workspace"] == "other-org" + + def test_repos_list_whitespace_only_workspace_falls_back( + self, stub_client: bb_api.BBClient + ) -> None: + """Whitespace-only workspace falls back to config workspace.""" + recorder = _recorder([]) + with patch.object(bb_ops, "repos_list", recorder): + mcp_server.repos_list(workspace=" ") + assert recorder.calls[0][1]["workspace"] == "acme" # from config + + def test_branch_show_passes_name(self, stub_client: bb_api.BBClient) -> None: + recorder = _recorder({"name": "feat/widget"}) + with patch.object(bb_ops, "branch_show", recorder): + out = mcp_server.branch_show(name="feat/widget", repo="my-repo") + assert recorder.calls[0][0] == (stub_client, "acme", "my-repo", "feat/widget") + assert out["name"] == "feat/widget" + + def test_commits_list_empty_branch_passes_none( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder([]) + with patch.object(bb_ops, "commits_list", recorder): + mcp_server.commits_list(repo="my-repo") + assert recorder.calls[0][1]["branch"] is None + + +class TestGitTools: + def test_git_current_branch_uses_default_path( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + recorder = _recorder("main") + with patch.object(git_ops, "git_current_branch", recorder): + out = mcp_server.git_current_branch() + # Default path from BB_DEFAULT_REPO_PATH env (set by _reset_state). + assert recorder.calls[0][1]["path"] == "/test/cwd" + assert out == {"ok": True, "path": "/test/cwd", "branch": "main"} + + def test_git_current_branch_explicit_path( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder("main") + with patch.object(git_ops, "git_current_branch", recorder): + mcp_server.git_current_branch(path="/explicit/dir") + assert recorder.calls[0][1]["path"] == "/explicit/dir" + + def test_git_status_payload_under_working_tree_key( + self, stub_client: bb_api.BBClient + ) -> None: + """Payload is keyed under `working_tree`, not `status`, to + avoid colliding with the HTTP-status field _error_dict uses + for BBApiError.""" + status = {"branch": "main", "clean": True} + with patch.object(git_ops, "git_status", _recorder(status)): + out = mcp_server.git_status() + assert out["working_tree"] == status + assert "status" not in out # no collision risk + + def test_git_recent_commits_passes_count_and_ref( + self, stub_client: bb_api.BBClient + ) -> None: + recorder = _recorder([{"sha": "a" * 40}]) + with patch.object(git_ops, "git_recent_commits", recorder): + mcp_server.git_recent_commits(count=5, ref="origin/main") + assert recorder.calls[0][1]["count"] == 5 + assert recorder.calls[0][1]["ref"] == "origin/main" + + def test_git_op_error_wrapped_in_error_dict( + self, stub_client: bb_api.BBClient + ) -> None: + def raise_git(*_args: Any, **_kwargs: Any) -> Any: + raise git_ops.GitOpError(["git", "status"], 128, "fatal: not a git repo") + + with patch.object(git_ops, "git_status", raise_git): + out = mcp_server.git_status() + assert out["ok"] is False + assert out["kind"] == "GitOpError" + assert out["returncode"] == 128 + + +# --------------------------------------------------------------------------- +# whoami +# --------------------------------------------------------------------------- + + +class TestWhoami: + """whoami has three phases — (1) config, (2) git context, (3) workspace + reachability via a single low-cost GET. Every test stubs phase (3)'s + HTTP layer so the suite stays hermetic — without the stub, an unpatched + BBClient.get would hit api.bitbucket.org for real.""" + + @staticmethod + def _stub_auth_ok(client: bb_api.BBClient) -> list[tuple[str, dict]]: + """Replace client.get with a recorder that returns success. + Returns the call-log so tests can assert the right endpoint + was hit with the right query.""" + calls: list[tuple[str, dict]] = [] + def fake_get(path: str, *, query=None, timeout=None): + calls.append((path, dict(query or {}))) + return {"slug": "acme", "type": "workspace"} + client.get = fake_get # type: ignore[method-assign] + return calls + + def test_reports_config_and_git_context( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + monkeypatch.setattr( + git_ops, "git_current_branch", + lambda path=None: "feat/test", + ) + monkeypatch.setattr( + git_ops, "git_remote_repo", + lambda path=None: ("acme", "widget-service"), + ) + calls = self._stub_auth_ok(stub_client) + out = mcp_server.whoami() + assert out["ok"] is True + assert out["user"] == "alice@example.com" + assert out["workspace"] == "acme" + assert out["git_branch"] == "feat/test" + assert out["git_workspace"] == "acme" + assert out["git_repo"] == "widget-service" + assert out["auth"] == {"ok": True} + # Reachability probe hit the right endpoint with the cheap pagelen. + assert calls == [("/repositories/acme", {"pagelen": "1"})] + # Token must NEVER be echoed. + assert "tok-xyz" not in str(out) + assert "token" not in {k.lower() for k in out.keys()} + + def test_handles_git_failure_gracefully( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Running outside a git repo shouldn't flip ok=False — the + server is useful (config-reachable) even there.""" + def raise_git(*_args: Any, **_kwargs: Any) -> Any: + raise git_ops.GitOpError(["git"], 128, "not a git repo") + + monkeypatch.setattr(git_ops, "git_current_branch", raise_git) + monkeypatch.setattr(git_ops, "git_remote_repo", raise_git) + self._stub_auth_ok(stub_client) + out = mcp_server.whoami() + assert out["ok"] is True # config still loaded + # The autouse fixture sets BB_DEFAULT_REPO_PATH so cwd resolves + # cleanly — both git probes run and both capture their failures. + assert "git_branch_error" in out + assert "git_remote_error" in out + + def test_cwd_error_skips_git_probes( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """When _default_repo_path raises (e.g. cwd was deleted out from + under the process), the per-probe git calls must be SKIPPED — not + called with path=None and not silently swallowed. cwd_error + captures the failure; no git_branch_error / git_remote_error + keys are written.""" + def raise_cwd() -> str: + raise OSError("[Errno 2] No such file or directory") + + monkeypatch.setattr(mcp_server, "_default_repo_path", raise_cwd) + # Tripwires — if either gets called we want a loud test failure, + # not a silent pass. + def boom(*_a: Any, **_k: Any) -> Any: + raise AssertionError("git probe ran despite cwd_error") + monkeypatch.setattr(git_ops, "git_current_branch", boom) + monkeypatch.setattr(git_ops, "git_remote_repo", boom) + self._stub_auth_ok(stub_client) + out = mcp_server.whoami() + assert out["ok"] is True + assert "cwd_error" in out + assert "git_branch_error" not in out + assert "git_remote_error" not in out + assert "cwd" not in out + # Phase 3 still runs — auth is independent of cwd. + assert out["auth"] == {"ok": True} + + def test_config_error_flips_ok_false( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """If config is missing, ok=False AND we still surface git context + (best-effort) so the user can debug. The auth probe is skipped + (no client to probe with) so out['auth'] is absent.""" + mcp_server._reset_client_cache() + def raise_config(*_args: Any, **_kwargs: Any) -> Any: + raise bb_api.BBConfigError("Missing BB_USER") + + monkeypatch.setattr(bb_api, "load_config", raise_config) + monkeypatch.setattr(git_ops, "git_current_branch", lambda path=None: "main") + monkeypatch.setattr( + git_ops, "git_remote_repo", + lambda path=None: ("acme", "widget-service"), + ) + out = mcp_server.whoami() + assert out["ok"] is False + assert out["kind"] == "BBConfigError" + assert "BB_USER" in out["message"] + # Still reports git context. + assert out["git_branch"] == "main" + # Auth probe MUST be skipped when there's no client — never let + # a None-deref slip in by refactor. + assert "auth" not in out + + def test_auth_probe_failure_does_not_flip_ok( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """A 401 from the workspace endpoint means the token is invalid + for THIS workspace. Surface it as out['auth']['ok']=False but + keep the outer ok=True — config + git context are still useful + for debugging the credential.""" + monkeypatch.setattr(git_ops, "git_current_branch", lambda path=None: "main") + monkeypatch.setattr( + git_ops, "git_remote_repo", + lambda path=None: ("acme", "widget-service"), + ) + + def fake_get(path: str, *, query=None, timeout=None): + raise bb_api.BBApiError( + status=401, + url="https://api.bitbucket.org/2.0/repositories/acme?pagelen=1", + body="", + ) + + stub_client.get = fake_get # type: ignore[method-assign] + out = mcp_server.whoami() + assert out["ok"] is True # outer call still ok + assert out["auth"]["ok"] is False + assert out["auth"]["kind"] == "BBApiError" + assert out["auth"]["status"] == 401 + # Token must NEVER be echoed, even on the auth-failure path. + assert "tok-xyz" not in str(out) + + def test_auth_probe_url_encodes_workspace( + self, + stub_client: bb_api.BBClient, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """If the workspace slug has a character that needs URL + encoding (rare, but `/` would break path parsing), the probe + must encode it — bash uses raw curl, Python uses urllib.quote + with safe=''. Test the encoding rather than the raw substitution.""" + # BBConfig is frozen; swap the whole client in. + weird_cfg = bb_api.BBConfig( + user="alice@example.com", + token="tok-xyz", + workspace="ws/with-slash", + api_base=bb_api.DEFAULT_API_BASE, + ) + stub_client = bb_api.BBClient(weird_cfg) + monkeypatch.setattr(mcp_server, "_client_cache", stub_client) + monkeypatch.setattr(git_ops, "git_current_branch", lambda path=None: "main") + monkeypatch.setattr( + git_ops, "git_remote_repo", + lambda path=None: ("acme", "widget-service"), + ) + calls = self._stub_auth_ok(stub_client) + out = mcp_server.whoami() + assert out["ok"] is True + # `/` must be encoded as %2F so it doesn't fragment the path. + assert calls == [("/repositories/ws%2Fwith-slash", {"pagelen": "1"})] + + +# --------------------------------------------------------------------------- +# Bootstrap stub +# --------------------------------------------------------------------------- + + +class TestBootstrapStub: + def test_fastmcp_stub_run_raises(self) -> None: + """The stub MCP must never accidentally serve in production — + .run() raises a clear error so a test that imports mcp_server + and calls .run() fails loud instead of hanging on stdio.""" + with pytest.raises(RuntimeError, match="BB_MCP_SKIP_BOOTSTRAP"): + mcp_server.mcp.run() + + def test_skip_bootstrap_env_is_set(self) -> None: + """conftest.py sets BB_MCP_SKIP_BOOTSTRAP=1 unconditionally; pin + that we actually loaded the stub path (not the real FastMCP).""" + import os as _os + assert _os.environ.get("BB_MCP_SKIP_BOOTSTRAP") == "1" + assert mcp_server._MCP_SKIP_BOOTSTRAP is True + + +class TestVenvLocation: + """Pin the durable XDG-spec venv location so a regression to the + old /tmp/bbenv path (which gets wiped at every boot, forcing a + rebuild) doesn't slip through. Mirrors the zenhub-cli pattern.""" + + def test_venv_dir_is_under_xdg_data_home(self) -> None: + """The venv path must end in `bitbucket-cli/venv` so it lives + alongside other XDG-spec app state.""" + from pathlib import Path + assert mcp_server._VENV_DIR.parts[-2:] == ("bitbucket-cli", "venv") + # NOT /tmp (which would re-bootstrap every reboot). + assert not str(mcp_server._VENV_DIR).startswith("/tmp") + + def test_xdg_data_home_env_var_honoured(self, monkeypatch: pytest.MonkeyPatch) -> None: + """`XDG_DATA_HOME=/custom/path` must place the venv at + `/custom/path/bitbucket-cli/venv` — per the XDG Base Dir spec. + Calls the helper directly so the test doesn't need to reload + the module.""" + from pathlib import Path + monkeypatch.setenv("XDG_DATA_HOME", "/custom/xdg") + assert mcp_server._xdg_data_home() == Path("/custom/xdg") + + def test_xdg_data_home_default_under_home( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + """When XDG_DATA_HOME is unset, fall back to ~/.local/share.""" + from pathlib import Path + monkeypatch.delenv("XDG_DATA_HOME", raising=False) + assert mcp_server._xdg_data_home() == Path.home() / ".local" / "share" + + def test_venv_ready_sentinel_inside_venv_dir(self) -> None: + """The ready-sentinel must live INSIDE the venv dir so removing + the whole venv (`rm -rf $venv_dir`) also removes the sentinel + — otherwise a stale sentinel could survive and claim a missing + venv is ready.""" + assert mcp_server._VENV_READY.parent == mcp_server._VENV_DIR