diff --git a/api/README.md b/api/README.md
index 39bb7ec..571eb01 100644
--- a/api/README.md
+++ b/api/README.md
@@ -18,8 +18,12 @@ Import as `temporal_model.api`. Depends on `temporal-model-core`.
   `version` is `{api, model}` — the code release (== the Docker image tag,
   `null` on non-release builds) and the packaged model release.
   `POST /predict?verbose=true` adds a `details` block (decision, preprocessing,
-  per-tube tracks). See `docs/specs/2026-06-02-api-service-design.md` for the
-  full contract.
+  per-tube tracks). `POST /predict?compute_trigger=true` runs the
+  first-crossing search (extra classifier work, off by default) and adds a
+  top-level `trigger_frame_index` (`null` if nothing crossed) — with
+  `verbose=true` it also fills `details.decision.trigger_tube_id` and
+  per-tube `details.tubes[].first_crossing_frame`. See
+  `docs/specs/2026-06-02-api-service-design.md` for the full contract.
 
 ## Run
 
diff --git a/api/src/temporal_model/api/app.py b/api/src/temporal_model/api/app.py
index 6a23689..45a7ab7 100644
--- a/api/src/temporal_model/api/app.py
+++ b/api/src/temporal_model/api/app.py
@@ -127,7 +127,10 @@ def health(request: Request) -> HealthResponse:
     dependencies=[Depends(require_token)],
 )
 async def predict(
-    body: PredictRequest, request: Request, verbose: bool = False
+    body: PredictRequest,
+    request: Request,
+    verbose: bool = False,
+    compute_trigger: bool = False,
 ) -> PredictResponse:
     bucket = body.bucket or settings.s3_bucket
     if not bucket:
@@ -154,7 +157,11 @@ async def predict(
                 )
 
             out = await runner.predict(
-                paths, roi=body.roi_xyxyn, timer=timer, profile=profile
+                paths,
+                roi=body.roi_xyxyn,
+                timer=timer,
+                profile=profile,
+                compute_trigger=compute_trigger,
             )
 
             profiling = None
@@ -173,6 +180,7 @@ async def predict(
                 model_version=runner.version,
                 calibrated=runner.calibrated,
                 verbose=verbose,
+                compute_trigger=compute_trigger,
                 threshold_overridden=runner.threshold_overridden,
                 packaged_threshold=runner.packaged_threshold,
                 profiling=profiling,
diff --git a/api/src/temporal_model/api/model_runner.py b/api/src/temporal_model/api/model_runner.py
index d2ba875..9796e1f 100644
--- a/api/src/temporal_model/api/model_runner.py
+++ b/api/src/temporal_model/api/model_runner.py
@@ -124,6 +124,7 @@ async def predict(
         roi: tuple[float, float, float, float] | None = None,
         timer: StageTimer | None = None,
         profile: dict[str, Any] | None = None,
+        compute_trigger: bool = False,
     ) -> Any:
         """Resolve detections (cache + detect misses) then run the model.
 
@@ -135,7 +136,7 @@ async def predict(
         """
         async with self._lock:
             return await run_in_threadpool(
-                self._predict_sync, frame_paths, roi, timer, profile
+                self._predict_sync, frame_paths, roi, timer, profile, compute_trigger
             )
 
     def _predict_sync(
@@ -144,6 +145,7 @@ def _predict_sync(
         roi: tuple[float, float, float, float] | None = None,
         timer: StageTimer | None = None,
         profile: dict[str, Any] | None = None,
+        compute_trigger: bool = False,
     ) -> Any:
         started = time.perf_counter()
         frames = self._model.load_sequence(frame_paths)
@@ -160,7 +162,11 @@ def _predict_sync(
             self._cache.put(fd.frame_id, fd)
             resolved[fd.frame_id] = fd
         out = self._model.predict(
-            frames, frame_detections=resolved, roi=roi, timer=timer
+            frames,
+            frame_detections=resolved,
+            roi=roi,
+            timer=timer,
+            compute_trigger=compute_trigger,
         )
         if profile is not None:
             profile["n_frames"] = len(frames)
diff --git a/api/src/temporal_model/api/schemas.py b/api/src/temporal_model/api/schemas.py
index e5996ca..e5f9a9f 100644
--- a/api/src/temporal_model/api/schemas.py
+++ b/api/src/temporal_model/api/schemas.py
@@ -1,8 +1,10 @@
 """Public request/response DTOs and the mapper from the core model output.
 
 The default response is the lean verdict; ``?verbose=true`` adds a ``details``
-block. ``details`` is only set when verbose, so the route serializes with
-``exclude_unset=True`` to omit it otherwise (while keeping explicit ``null``s).
+block and ``?compute_trigger=true`` adds the time-to-detection fields. Both
+are only set when requested, so the route serializes with
+``exclude_unset=True`` to omit them otherwise (while keeping explicit
+``null``s).
 """
 
 import re
@@ -87,6 +89,7 @@ class Tube(BaseModel):
     end_frame: int
     logit: float
     probability: float | None
+    first_crossing_frame: int | None = None
     entries: list[FrameEntry]
 
 
@@ -95,6 +98,7 @@ class Decision(BaseModel):
     threshold: float
     threshold_overridden: bool = False
     packaged_threshold: float | None = None
+    trigger_tube_id: int | None = None
 
 
 class Preprocessing(BaseModel):
@@ -127,6 +131,7 @@ class Version(BaseModel):
 class PredictResponse(BaseModel):
     is_smoke: bool
     probability: float | None
+    trigger_frame_index: int | None = None
     version: Version
     details: Details | None = None
 
@@ -150,12 +155,23 @@ def _to_details(
     threshold_overridden: bool,
     packaged_threshold: float | None,
     profiling: dict[str, Any] | None = None,
+    compute_trigger: bool = False,
 ) -> Details:
     tubes_block = details["tubes"]
     pre = details["preprocessing"]
+    decision = dict(details["decision"])
+    kept = tubes_block["kept"]
+    if not compute_trigger:
+        # Core emits these keys even on the fast path (always null there);
+        # dropping them keeps the DTO fields unset so exclude_unset omits
+        # them and the no-flag response is unchanged.
+        decision.pop("trigger_tube_id", None)
+        kept = [
+            {k: v for k, v in t.items() if k != "first_crossing_frame"} for t in kept
+        ]
     return Details(
         decision=Decision(
-            **details["decision"],
+            **decision,
             threshold_overridden=threshold_overridden,
             packaged_threshold=packaged_threshold,
         ),
@@ -168,7 +184,7 @@ def _to_details(
             # always emits the key; a silent 0 here would mask a core rename.
             num_tubes_outside_roi=tubes_block["num_outside_roi"],
         ),
-        tubes=[Tube(**t) for t in tubes_block["kept"]],
+        tubes=[Tube(**t) for t in kept],
         profiling=profiling,
     )
 
@@ -180,6 +196,7 @@ def to_response(
     model_version: str | None,
     calibrated: bool,
     verbose: bool,
+    compute_trigger: bool = False,
     threshold_overridden: bool = False,
     packaged_threshold: float | None = None,
     profiling: dict[str, Any] | None = None,
@@ -190,11 +207,15 @@ def to_response(
         "probability": _decision_probability(out.details, calibrated),
         "version": Version(api=api_version, model=model_version),
     }
+    if compute_trigger:
+        # Explicit null is meaningful here: searched, no crossing found.
+        kwargs["trigger_frame_index"] = out.trigger_frame_index
     if verbose:
         kwargs["details"] = _to_details(
             out.details,
             threshold_overridden=threshold_overridden,
             packaged_threshold=packaged_threshold,
             profiling=profiling,
+            compute_trigger=compute_trigger,
         )
     return PredictResponse(**kwargs)
diff --git a/api/tests/test_app.py b/api/tests/test_app.py
index d0fa06e..7dbaa09 100644
--- a/api/tests/test_app.py
+++ b/api/tests/test_app.py
@@ -66,9 +66,13 @@ def __init__(self, output=None, error=None):
         self._output = output
         self._error = error
         self.roi = None
+        self.compute_trigger = None
 
-    async def predict(self, paths, *, roi=None, timer=None, profile=None):
+    async def predict(
+        self, paths, *, roi=None, timer=None, profile=None, compute_trigger=False
+    ):
         self.roi = roi
+        self.compute_trigger = compute_trigger
         if self._error:
             raise self._error
         if timer is not None:
@@ -212,6 +216,34 @@ def test_predict_verbose_surfaces_override(client):
     assert decision["packaged_threshold"] == 0.5
 
 
+def test_predict_compute_trigger_returns_trigger_frame_index(client):
+    r = client.post("/predict?compute_trigger=true", json={"frames": KEYS})
+    assert r.status_code == 200
+    assert r.json() == {
+        "is_smoke": True,
+        "probability": 0.98,
+        "trigger_frame_index": 3,
+        "version": {"api": None, "model": "1.2.0"},
+    }
+    assert client.app.state.runner.compute_trigger is True
+
+
+def test_predict_default_runs_fast_path(client):
+    r = client.post("/predict", json={"frames": KEYS})
+    assert r.status_code == 200
+    assert "trigger_frame_index" not in r.json()
+    assert client.app.state.runner.compute_trigger is False
+
+
+def test_predict_compute_trigger_verbose_adds_trigger_details(client):
+    r = client.post("/predict?compute_trigger=true&verbose=true", json={"frames": KEYS})
+    body = r.json()
+    assert r.status_code == 200
+    assert body["trigger_frame_index"] == 3
+    assert body["details"]["decision"]["trigger_tube_id"] == 7
+    assert body["details"]["tubes"][0]["first_crossing_frame"] == 3
+
+
 def test_predict_reports_api_version(client, monkeypatch):
     # settings.api_version is read per request, so a monkeypatched value
     # must show up as version.api.
diff --git a/api/tests/test_model_runner.py b/api/tests/test_model_runner.py
index a30f036..52685d3 100644
--- a/api/tests/test_model_runner.py
+++ b/api/tests/test_model_runner.py
@@ -121,6 +121,7 @@ def __init__(self):
         self.detect_calls: list[list[str]] = []
         self.predict_calls: list[set[str]] = []
         self.roi_calls: list[tuple | None] = []
+        self.trigger_calls: list[bool] = []
 
     def load_sequence(self, paths):
         return [
@@ -137,9 +138,18 @@ def detect(self, frames):
             for i, f in enumerate(frames)
         ]
 
-    def predict(self, frames, *, frame_detections=None, roi=None, timer=None):
+    def predict(
+        self,
+        frames,
+        *,
+        frame_detections=None,
+        roi=None,
+        timer=None,
+        compute_trigger=False,
+    ):
         self.predict_calls.append(set(frame_detections or {}))
         self.roi_calls.append(roi)
+        self.trigger_calls.append(compute_trigger)
         return SimpleNamespace(frame_ids=[f.frame_id for f in frames])
 
 
@@ -167,6 +177,20 @@ def test_predict_roi_defaults_to_none():
     assert model.roi_calls[-1] is None
 
 
+def test_predict_threads_compute_trigger_to_model():
+    model = _OrchestrationModel()
+    runner = ModelRunner(model, name="m", version="1", calibrated=True)
+    asyncio.run(runner.predict(["c/x_00.jpg"], compute_trigger=True))
+    assert model.trigger_calls[-1] is True
+
+
+def test_predict_compute_trigger_defaults_to_false():
+    model = _OrchestrationModel()
+    runner = ModelRunner(model, name="m", version="1", calibrated=True)
+    asyncio.run(runner.predict(["c/x_00.jpg"]))
+    assert model.trigger_calls[-1] is False
+
+
 def test_predict_caches_and_reuses_detections():
     model = _OrchestrationModel()
     runner = ModelRunner(
@@ -208,7 +232,15 @@ def load_sequence(self, paths):
     def detect(self, misses):
         return [SimpleNamespace(frame_id=f.frame_id) for f in misses]
 
-    def predict(self, frames, *, frame_detections=None, roi=None, timer=None):
+    def predict(
+        self,
+        frames,
+        *,
+        frame_detections=None,
+        roi=None,
+        timer=None,
+        compute_trigger=False,
+    ):
         self.predict_timer = timer
         if timer is not None:
             with timer.stage("classifier"):
diff --git a/api/tests/test_schemas.py b/api/tests/test_schemas.py
index 1e4024f..83eee83 100644
--- a/api/tests/test_schemas.py
+++ b/api/tests/test_schemas.py
@@ -302,3 +302,78 @@ def test_verbose_details_num_tubes_outside_roi_is_strict():
         to_response(
             out, api_version=None, model_version="1", calibrated=True, verbose=True
         )
+
+
+def test_compute_trigger_sets_top_level_trigger_frame_index():
+    out = SimpleNamespace(
+        is_positive=True, trigger_frame_index=3, details=_details([_tube(7, 0.98)])
+    )
+    resp = to_response(
+        out,
+        api_version=None,
+        model_version="1.2.0",
+        calibrated=True,
+        verbose=False,
+        compute_trigger=True,
+    )
+    dumped = resp.model_dump(exclude_unset=True)
+    assert dumped["trigger_frame_index"] == 3
+    assert "details" not in dumped
+
+
+def test_compute_trigger_no_crossing_is_explicit_null():
+    # Searched but nothing crossed: the key is present with an explicit null.
+    out = SimpleNamespace(
+        is_positive=False, trigger_frame_index=None, details=_details([])
+    )
+    resp = to_response(
+        out,
+        api_version=None,
+        model_version="1.2.0",
+        calibrated=True,
+        verbose=False,
+        compute_trigger=True,
+    )
+    dumped = resp.model_dump(exclude_unset=True)
+    assert "trigger_frame_index" in dumped
+    assert dumped["trigger_frame_index"] is None
+
+
+def test_default_omits_trigger_frame_index():
+    # Even when the core output carries a trigger, the flag gates exposure.
+    out = SimpleNamespace(
+        is_positive=True, trigger_frame_index=3, details=_details([_tube(7, 0.98)])
+    )
+    resp = to_response(
+        out, api_version=None, model_version="1.2.0", calibrated=True, verbose=False
+    )
+    assert "trigger_frame_index" not in resp.model_dump(exclude_unset=True)
+
+
+def test_compute_trigger_verbose_adds_trigger_details():
+    out = SimpleNamespace(
+        is_positive=True, trigger_frame_index=3, details=_details([_tube(7, 0.98)])
+    )
+    resp = to_response(
+        out,
+        api_version=None,
+        model_version="1.2.0",
+        calibrated=True,
+        verbose=True,
+        compute_trigger=True,
+    )
+    details = resp.model_dump(exclude_unset=True)["details"]
+    assert details["decision"]["trigger_tube_id"] == 7
+    assert details["tubes"][0]["first_crossing_frame"] == 3
+
+
+def test_verbose_without_compute_trigger_omits_trigger_details():
+    out = SimpleNamespace(
+        is_positive=True, trigger_frame_index=3, details=_details([_tube(7, 0.98)])
+    )
+    resp = to_response(
+        out, api_version=None, model_version="1.2.0", calibrated=True, verbose=True
+    )
+    details = resp.model_dump(exclude_unset=True)["details"]
+    assert "trigger_tube_id" not in details["decision"]
+    assert "first_crossing_frame" not in details["tubes"][0]
diff --git a/docs/specs/2026-06-11-api-compute-trigger-design.md b/docs/specs/2026-06-11-api-compute-trigger-design.md
new file mode 100644
index 0000000..38e0fdd
--- /dev/null
+++ b/docs/specs/2026-06-11-api-compute-trigger-design.md
@@ -0,0 +1,91 @@
+# API: Explicit `?compute_trigger=true` Flag for Time-to-Detection
+
+**Date:** 2026-06-11
+**Status:** Approved
+**Refs:** issue #26, PR #25, issue #23
+
+## Motivation
+
+PR #25 made the trigger / first-crossing search eval-only: the serving path
+runs `predict(..., compute_trigger=False)` and the always-null trigger fields
+(`trigger_frame_index`, `trigger_tube_id`, per-tube `first_crossing_frame`)
+were dropped from the `/predict` response. The API therefore never computes
+time-to-detection (TTD).
+
+We deliberately did not couple TTD to `?verbose=true`: `verbose` only exposes
+already-computed details and must not silently run the O(tube_length)
+classifier loop (~34% of CPU latency). When an HTTP consumer needs TTD, it
+opts in explicitly — the param name signals "do extra work".
+
+## Decisions (agreed in brainstorming)
+
+1. **Explicit query param.** `POST /predict?compute_trigger=true`, default
+   `false`. Same FastAPI bool-param pattern as `verbose`, orthogonal to it.
+2. **Plumbing only — core already supports it.**
+   `BboxTubeTemporalModel.predict` already takes `compute_trigger` and
+   populates `TemporalModelOutput.trigger_frame_index`,
+   `decision.trigger_tube_id` and per-tube `first_crossing_frame` in the core
+   details. The API threads the flag through
+   `ModelRunner.predict` → `_predict_sync` → `model.predict`. (The ticket's
+   `predict_sequence` step is obsolete; that function was refactored away.)
+3. **Omit-when-unset serialization.** The route already serializes with
+   `response_model_exclude_unset=True`. Trigger fields are added back to the
+   DTOs as optional fields but only *set* when `compute_trigger=true`, so the
+   default response stays byte-identical. (Always-present nullable fields
+   were rejected: they change the default response, violating the
+   acceptance criterion.)
+4. **Explicit `null` means "searched, no crossing".** When
+   `compute_trigger=true` and the search finds no crossing (e.g. a negative
+   decision), `trigger_frame_index` serializes as an explicit `null` — same
+   convention the response already uses for `probability` (null carries
+   meaning, absence means "not computed").
+
+## API contract
+
+| Query params | Response |
+|---|---|
+| (default) | Unchanged fast path; no trigger fields anywhere. |
+| `compute_trigger=true` | Top-level `trigger_frame_index` (int or null). |
+| `compute_trigger=true&verbose=true` | Also `details.decision.trigger_tube_id` and per-tube `details.tubes[].first_crossing_frame`. |
+| `verbose=true` only | `details` as today, still no trigger fields. |
+
+DTO changes (`api/src/temporal_model/api/schemas.py`):
+
+- `PredictResponse.trigger_frame_index: int | None` — unset unless the flag
+  is on.
+- `Decision.trigger_tube_id: int | None` and
+  `Tube.first_crossing_frame: int | None` — unset unless the flag is on.
+  The core details dict already carries these keys (pydantic's default
+  extra-key handling currently ignores them); `_to_details` gains a
+  `compute_trigger` flag to forward them.
+
+## Components
+
+- `api/src/temporal_model/api/app.py` — `compute_trigger: bool = False`
+  route param; forwarded to `runner.predict(...)` and `to_response(...)`.
+- `api/src/temporal_model/api/model_runner.py` — `compute_trigger`
+  keyword threaded through `predict` and `_predict_sync` to
+  `self._model.predict`.
+- `api/src/temporal_model/api/schemas.py` — DTO fields above;
+  `to_response`/`_to_details` gate the fields on the flag.
+- `api/README.md` — document the param in the `/predict` section.
+
+No core changes.
+
+## Testing
+
+`api/tests/test_app.py`, existing `FakeRunner` pattern:
+
+- Default response contains no trigger keys (byte-compatible with today).
+- The flag is forwarded to `runner.predict` (FakeRunner records kwargs).
+- `compute_trigger=true` → `trigger_frame_index` present and equal to the
+  core output's value; explicit `null` when the core reports none.
+- `compute_trigger=true&verbose=true` → `decision.trigger_tube_id` and
+  `tubes[].first_crossing_frame` populated from the core details.
+- `verbose=true` alone → details present but without trigger fields.
+
+## Acceptance (from issue #26)
+
+- Default response unchanged (no trigger fields, fast path).
+- `?compute_trigger=true` returns a populated `trigger_frame_index` and
+  verbose trigger fields, matching `find_first_crossing_trigger`.