From eea518c32ea6b73a75b97bf2061814a71b0fba3e Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:28:56 +0200 Subject: [PATCH 01/12] docs: spec for caller-supplied detections on /predict (detector bypass) --- ...26-06-11-api-supplied-detections-design.md | 202 ++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 docs/specs/2026-06-11-api-supplied-detections-design.md diff --git a/docs/specs/2026-06-11-api-supplied-detections-design.md b/docs/specs/2026-06-11-api-supplied-detections-design.md new file mode 100644 index 0000000..1cf67a2 --- /dev/null +++ b/docs/specs/2026-06-11-api-supplied-detections-design.md @@ -0,0 +1,202 @@ +# API: Caller-Supplied Detections on `/predict` (Detector Bypass) + +**Date:** 2026-06-11 +**Status:** Approved + +## Motivation + +The Pyronear edge devices (RPis running pyro-engine) already run a YOLO +detector on every frame and ship the resulting bboxes with their alerts. When +the platform calls `/predict` to get a temporal verdict on an alert sequence, +the API re-runs its own bundled YOLO on the same frames — a redundant GPU pass +that adds latency and compute. This feature lets the caller supply the +per-frame bboxes it already holds; the API skips the detector stage and feeds +the supplied boxes straight into tube building. Everything downstream — +tube building, ROI filtering, cropping, classifier scoring, calibration, +decision — runs unchanged. + +## Decisions (agreed in brainstorming) + +1. **All-or-nothing bypass with explicit empties.** If the request carries + `detections`, the bundled detector never runs — there is no hybrid mode + that detects "uncovered" frames. A frame with no detections is expressed + as an explicit empty list, meaning "a detector ran and saw nothing"; such + frames participate in tube gap handling exactly as if the bundled detector + had returned nothing. +2. **Index-aligned list shape.** `detections` is a list with exactly one + entry per frame, in the same order as `frames`. Length mismatch is a + validation error. This makes "no detections for frame i" unambiguous + (empty inner list) with trivial validation — no omitted-key rules a + dict-keyed shape would need. +3. **`xyxyn` + `confidence` inner objects.** Boxes arrive as normalized + corners — the convention pyro-engine produces and the platform stores, and + the same ultralytics vocabulary as the existing `roi_xyxyn` field. The API + converts to the internal center-based `xywhn` (`Detection` dataclass) at + the boundary. `class_id` is not exposed; supplied boxes are smoke + (`class_id=0`) by definition. +4. **Detection cache fully bypassed — no read, no write.** Supplied + detections are fresher truth than the cache, and writing them would poison + the shared LRU with a foreign detector's outputs for subsequent + detector-path requests. This extends the ROI spec's cache invariant: + only full-frame detections **produced by the bundled detector** ever enter + the cache. +5. **Confidences are trusted, not re-thresholded.** The edge detector already + applied its own confidence threshold; the API validates ranges only and + does not apply the packaged `infer.confidence_threshold` to supplied + boxes. +6. **Provenance in verbose details.** `details.preprocessing` gains + `detections_source: "request" | "detector"` so a logged response shows + which path produced the tubes. + +## API contract + +### Request + +New optional field on `PredictRequest`: + +```json +{ + "frames": ["org/img_t0.jpg", "org/img_t1.jpg", "org/img_t2.jpg"], + "detections": [ + [ {"xyxyn": [0.41, 0.30, 0.47, 0.36], "confidence": 0.62} ], + [], + [ {"xyxyn": [0.42, 0.29, 0.49, 0.36], "confidence": 0.71}, + {"xyxyn": [0.10, 0.50, 0.15, 0.55], "confidence": 0.33} ] + ] +} +``` + +Composes with `bucket`, `roi_xyxyn`, and `?verbose=true` unchanged. + +Validation (Pydantic, fails as `400 invalid_request` via the existing +`RequestValidationError` handler): + +- `len(detections) == len(frames)` (model-level validator, checked after + field validation). +- Each box: `xyxyn` is exactly 4 floats, each in `[0, 1]` inclusive, with + `x_min < x_max` and `y_min < y_max` — same rules and fail-closed rationale + as `roi_xyxyn` (zero-area and inverted boxes rejected; catches most + accidental `xywhn` input). +- `confidence` in `[0, 1]` inclusive. +- `detections` omitted or `null` → exactly today's behavior: cache + bundled + detector, byte-identical responses. + +```python +class SuppliedDetection(BaseModel): + xyxyn: tuple[float, float, float, float] + confidence: float = Field(ge=0.0, le=1.0) + + @field_validator("xyxyn") + @classmethod + def _validate_xyxyn(cls, v): ... # same checks as roi_xyxyn + + +class PredictRequest(BaseModel): + frames: list[str] + bucket: str | None = None + roi_xyxyn: tuple[float, float, float, float] | None = None + detections: list[list[SuppliedDetection]] | None = None + + @model_validator(mode="after") + def _detections_match_frames(self): ... # length check +``` + +### Response + +- Top-level shape unchanged (`is_smoke`, `probability`, `model`). +- Verbose `details.preprocessing` gains + `detections_source: Literal["request", "detector"]`. Populated at the API + layer (the app knows whether the request carried `detections`); core + details are untouched. +- Verbose `details.profiling`: the `detector` stage is absent from stage + timings (it never ran); `cache_hits`/`cache_misses` are reported as `0`/`0` + (the cache was not consulted). + +### Conversion at the boundary + +Each supplied box maps to the internal `Detection`: + +``` +cx = (x_min + x_max) / 2; cy = (y_min + y_max) / 2 +w = x_max - x_min; h = y_max - y_min +class_id = 0; confidence = confidence +``` + +## API plumbing + +All changes live in the API layer; **core is untouched** — +`BboxTubeTemporalModel.predict()` already accepts a complete +`frame_detections` dict and `_resolve_frame_detections()` only detects +frames missing from it (`run_yolo_on_frames([])` early-returns). + +- `app.predict()` passes `body.detections` to + `ModelRunner.predict(detections=...)`, and threads + `detections_source` into `to_response()` → `_to_details()`. +- `ModelRunner._predict_sync()` branches after `load_sequence()`: + - `detections is None` → existing path (cache lookup, `detect()` on + misses, cache write-back). + - `detections` supplied → build + `resolved = {frames[i].frame_id: FrameDetections(frame_idx=i, + frame_id=frames[i].frame_id, timestamp=frames[i].timestamp, + detections=[converted boxes])}` and call + `self._model.predict(frames, frame_detections=resolved, roi=roi, + timer=timer)` directly. No cache read, no cache write, no `detect()` + call, no `detector` stage timing. +- Frames are still fetched from S3 — the classifier needs the image crops; + only the detector pass is skipped. +- Index alignment holds end to end: request `frames[i]` → + `frame_paths[i]` → `load_sequence` `frames[i]` (order-preserving) → + `detections[i]`. + +### Edge cases + +- **Truncation/padding:** core truncates to `max_frames` and may pad; + both look detections up by `frame_id` in the resolved dict, so supplied + entries for truncated-away frames are simply unused and padded duplicate + frames resolve to their original frame's detections — identical to the + cache path today. +- **Duplicate frame basenames** collapse in the `frame_id`-keyed dict + (last wins). This is pre-existing behavior on the cache path, not new. + +## Documented risk (out of scope to validate) + +The temporal classifier and calibrator were trained on tubes built from the +bundled detector's boxes (`yolo11s_nimble-narwhal`). Edge-detector boxes may +differ in tightness, confidence distribution, and threshold; tubes built from +them may shift crop geometry and calibration. The classifier scores image +crops, not box metadata, so the mechanism is expected to work — but +calibration on real RPi boxes is unvalidated. Validation happens at +platform-integration time, not in this work. + +## Testing + +**API** (`api/tests/`): + +- Validation matrix: length mismatch with `frames`; coordinate out of + `[0, 1]`; `x_min >= x_max`; `y_min >= y_max`; wrong tuple length; + confidence out of `[0, 1]` → 400 with message. +- Bypass proof: with `detections` supplied, the model's `detect()` is never + called (mock/monkeypatch assertion) and the cache is neither read nor + written. +- Equivalence: supplying the exact boxes the bundled detector would produce + (xywhn → xyxyn round-trip) yields the same verdict and tubes as the + detector path. +- Explicit empties: all-empty `detections` → no tubes → + `is_smoke: false`, `probability: 0.0` (calibrated). +- Conversion: a known `xyxyn` box arrives at the core model as the expected + `(cx, cy, w, h)` `Detection` with `class_id=0`. +- Composition: `detections` + `roi_xyxyn` filters tubes as usual. +- Verbose: `detections_source` is `"request"` when supplied, `"detector"` + otherwise; profiling shows no `detector` stage when bypassed. +- `detections: null` / omitted → identical behavior to today (regression). + +**Core**: no changes, no new tests. + +## Out of scope + +- Hybrid mode (detect only uncovered frames) — mixes two detectors' outputs + in one tube; rejected in brainstorming. +- Calibration/accuracy validation of foreign detector boxes (see Documented + risk). +- Exposing `class_id` or other detector metadata in the request. +- Caching supplied detections (would poison the bundled-detector cache). From d98d34222d6b38ca2615b602789f7bb1bc93ccab Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:33:44 +0200 Subject: [PATCH 02/12] docs: make full-coverage requirement explicit in supplied-detections spec --- ...26-06-11-api-supplied-detections-design.md | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/docs/specs/2026-06-11-api-supplied-detections-design.md b/docs/specs/2026-06-11-api-supplied-detections-design.md index 1cf67a2..77c9307 100644 --- a/docs/specs/2026-06-11-api-supplied-detections-design.md +++ b/docs/specs/2026-06-11-api-supplied-detections-design.md @@ -23,10 +23,12 @@ decision — runs unchanged. as an explicit empty list, meaning "a detector ran and saw nothing"; such frames participate in tube gap handling exactly as if the bundled detector had returned nothing. -2. **Index-aligned list shape.** `detections` is a list with exactly one - entry per frame, in the same order as `frames`. Length mismatch is a - validation error. This makes "no detections for frame i" unambiguous - (empty inner list) with trivial validation — no omitted-key rules a +2. **Index-aligned list shape — every frame must be covered.** `detections` + is a list with exactly one entry per frame, in the same order as + `frames`. Missing detections for any frame is an error: a shorter (or + longer) list fails the length check, and `null` entries are rejected by + the schema — the *only* way to express "no detections for frame i" is an + explicit empty list. No partial coverage, no omitted-key rules a dict-keyed shape would need. 3. **`xyxyn` + `confidence` inner objects.** Boxes arrive as normalized corners — the convention pyro-engine produces and the platform stores, and @@ -72,7 +74,11 @@ Validation (Pydantic, fails as `400 invalid_request` via the existing `RequestValidationError` handler): - `len(detections) == len(frames)` (model-level validator, checked after - field validation). + field validation) — every frame must have an entry; partial coverage is + rejected. +- Inner entries must be lists: `null` is rejected by the + `list[list[SuppliedDetection]]` type. "No detections" is only expressible + as an explicit `[]`. - Each box: `xyxyn` is exactly 4 floats, each in `[0, 1]` inclusive, with `x_min < x_max` and `y_min < y_max` — same rules and fail-closed rationale as `roi_xyxyn` (zero-area and inverted boxes rejected; catches most @@ -172,9 +178,10 @@ platform-integration time, not in this work. **API** (`api/tests/`): -- Validation matrix: length mismatch with `frames`; coordinate out of - `[0, 1]`; `x_min >= x_max`; `y_min >= y_max`; wrong tuple length; - confidence out of `[0, 1]` → 400 with message. +- Validation matrix: length mismatch with `frames` (both shorter and + longer); `null` inner entry; coordinate out of `[0, 1]`; + `x_min >= x_max`; `y_min >= y_max`; wrong tuple length; confidence out of + `[0, 1]` → 400 with message. - Bypass proof: with `detections` supplied, the model's `detect()` is never called (mock/monkeypatch assertion) and the cache is neither read nor written. From 54430a7c3e6fef6c46f5852dcdab27420d820169 Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:34:24 +0200 Subject: [PATCH 03/12] docs: spell out per-detection validation in supplied-detections spec --- ...26-06-11-api-supplied-detections-design.md | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/docs/specs/2026-06-11-api-supplied-detections-design.md b/docs/specs/2026-06-11-api-supplied-detections-design.md index 77c9307..5acf7e9 100644 --- a/docs/specs/2026-06-11-api-supplied-detections-design.md +++ b/docs/specs/2026-06-11-api-supplied-detections-design.md @@ -94,7 +94,13 @@ class SuppliedDetection(BaseModel): @field_validator("xyxyn") @classmethod - def _validate_xyxyn(cls, v): ... # same checks as roi_xyxyn + def _validate_xyxyn(cls, v): + x_min, y_min, x_max, y_max = v + if not all(0.0 <= c <= 1.0 for c in v): + raise ValueError("xyxyn coordinates must be in [0, 1]") + if x_min >= x_max or y_min >= y_max: + raise ValueError("xyxyn requires x_min < x_max and y_min < y_max") + return v class PredictRequest(BaseModel): @@ -104,9 +110,20 @@ class PredictRequest(BaseModel): detections: list[list[SuppliedDetection]] | None = None @model_validator(mode="after") - def _detections_match_frames(self): ... # length check + def _detections_match_frames(self): + if self.detections is not None and len(self.detections) != len(self.frames): + raise ValueError( + "detections must have exactly one entry per frame " + f"(got {len(self.detections)} entries for {len(self.frames)} frames)" + ) + return self ``` +Pydantic's type system handles the rest of the well-formedness for free: +a detection that is not an object, lacks `xyxyn`/`confidence`, has a +non-numeric value, or an `xyxyn` that is not exactly 4 numbers all fail type +validation before the custom validators run. + ### Response - Top-level shape unchanged (`is_smoke`, `probability`, `model`). From 7508181ee660d128fbce0e1647db1ff9e282b1af Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:44:03 +0200 Subject: [PATCH 04/12] docs: implementation plan for caller-supplied detections on /predict --- .../2026-06-11-api-supplied-detections.md | 747 ++++++++++++++++++ 1 file changed, 747 insertions(+) create mode 100644 docs/plans/2026-06-11-api-supplied-detections.md diff --git a/docs/plans/2026-06-11-api-supplied-detections.md b/docs/plans/2026-06-11-api-supplied-detections.md new file mode 100644 index 0000000..6759c3c --- /dev/null +++ b/docs/plans/2026-06-11-api-supplied-detections.md @@ -0,0 +1,747 @@ +# Caller-Supplied Detections on `/predict` Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Let `/predict` callers supply per-frame detection boxes (from the RPi edge detector) so the API skips its bundled YOLO pass while tube building, ROI filtering, cropping, and classification run unchanged. + +**Architecture:** A new optional `detections` field on `PredictRequest` (index-aligned list of per-frame box lists, `xyxyn` + `confidence`), validated at the HTTP boundary. `ModelRunner._predict_sync` converts supplied boxes to internal `FrameDetections` and feeds the existing `model.predict(frame_detections=...)` injection seam, skipping `detect()` and the detection cache entirely. Core is untouched. Spec: `docs/specs/2026-06-11-api-supplied-detections-design.md`. + +**Tech Stack:** FastAPI + Pydantic v2 (`api/` package), pytest, `uv` (run tests from `api/`: `uv run pytest tests/ -v`). + +**Branch:** work on `arthur/feat-api-thread-bboxes`. Commit messages: conventional commits, NO co-author trailers. + +--- + +### Task 1: Request schema — `SuppliedDetection` + `detections` field + +**Files:** +- Modify: `api/src/temporal_model/api/schemas.py` (imports at line 11, `PredictRequest` at lines 22–69) +- Test: `api/tests/test_schemas.py` + +- [ ] **Step 1: Write the failing tests** + +Append to `api/tests/test_schemas.py` (existing imports at top already provide `pytest`, `ValidationError`, `PredictRequest`): + +```python +def test_request_detections_default_to_none(): + assert PredictRequest(frames=["a.jpg"]).detections is None + + +def test_request_accepts_per_frame_detections(): + req = PredictRequest( + frames=["a.jpg", "b.jpg"], + detections=[ + [{"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 0.7}], + [], + ], + ) + assert req.detections[0][0].xyxyn == (0.1, 0.2, 0.3, 0.4) + assert req.detections[0][0].confidence == 0.7 + assert req.detections[1] == [] + + +@pytest.mark.parametrize("entries", [[], [[]], [[], [], []]]) +def test_request_rejects_detections_length_mismatch(entries): + # frames has 2 keys; 0, 1 and 3 detection entries must all fail. + with pytest.raises(ValidationError, match="one entry per frame"): + PredictRequest(frames=["a.jpg", "b.jpg"], detections=entries) + + +def test_request_rejects_null_frame_entry(): + # "no detections" must be an explicit [], never null. + with pytest.raises(ValidationError): + PredictRequest(frames=["a.jpg"], detections=[None]) + + +@pytest.mark.parametrize( + "box", + [ + {"xyxyn": [-0.1, 0.2, 0.3, 0.4], "confidence": 0.5}, # coord < 0 + {"xyxyn": [0.1, 0.2, 0.3, 1.4], "confidence": 0.5}, # coord > 1 + {"xyxyn": [0.3, 0.2, 0.1, 0.4], "confidence": 0.5}, # x_min >= x_max + {"xyxyn": [0.1, 0.4, 0.3, 0.4], "confidence": 0.5}, # y_min >= y_max + {"xyxyn": [0.1, 0.2, 0.3], "confidence": 0.5}, # too short + {"xyxyn": [0.1, 0.2, 0.3, 0.4, 0.5], "confidence": 0.5}, # too long + {"xyxyn": ["a", 0.2, 0.3, 0.4], "confidence": 0.5}, # non-numeric + {"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 1.5}, # confidence > 1 + {"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": -0.1}, # confidence < 0 + {"xyxyn": [0.1, 0.2, 0.3, 0.4]}, # missing confidence + {"confidence": 0.5}, # missing xyxyn + "not-an-object", # wrong type entirely + ], +) +def test_request_rejects_malformed_detection(box): + with pytest.raises(ValidationError): + PredictRequest(frames=["a.jpg"], detections=[[box]]) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd api && uv run pytest tests/test_schemas.py -v -k detection` +Expected: FAIL — `test_request_detections_default_to_none` errors with `AttributeError: 'PredictRequest' object has no attribute 'detections'`; the rejection tests fail because no `ValidationError` is raised (unknown fields are ignored). + +- [ ] **Step 3: Implement the schema** + +In `api/src/temporal_model/api/schemas.py`: + +3a. Extend the pydantic import (line 11): + +```python +from pydantic import ( + BaseModel, + ConfigDict, + Field, + field_validator, + model_validator, +) +``` + +3b. Add `SuppliedDetection` immediately above `class PredictRequest` (after the `_BUCKET_RE` block): + +```python +class SuppliedDetection(BaseModel): + """One caller-supplied detection box (normalized xyxyn corners). + + Geometry rules match ``roi_xyxyn``. Checked inline rather than via the + core ``validate_roi`` helper so the error message names the detection + field, not "roi". + """ + + xyxyn: tuple[float, float, float, float] + confidence: float = Field(ge=0.0, le=1.0) + + @field_validator("xyxyn") + @classmethod + def _validate_xyxyn( + cls, v: tuple[float, float, float, float] + ) -> tuple[float, float, float, float]: + x_min, y_min, x_max, y_max = v + if not all(0.0 <= c <= 1.0 for c in v): + raise ValueError("xyxyn coordinates must be in [0, 1]") + if x_min >= x_max or y_min >= y_max: + raise ValueError("xyxyn requires x_min < x_max and y_min < y_max") + return v +``` + +3c. Add the field to `PredictRequest` directly under `roi_xyxyn` (line 33): + +```python + # Optional caller-supplied detections, one list per frame, index-aligned + # with `frames` ([] = that frame's detector saw nothing — never null). + # When set, the bundled YOLO and its cache are bypassed entirely and tubes + # are built from these boxes (see + # docs/specs/2026-06-11-api-supplied-detections-design.md). + detections: list[list[SuppliedDetection]] | None = None +``` + +3d. Add the cross-field length check after `_validate_roi` (line 69): + +```python + @model_validator(mode="after") + def _detections_match_frames(self) -> "PredictRequest": + if self.detections is not None and len(self.detections) != len(self.frames): + raise ValueError( + "detections must have exactly one entry per frame " + f"(got {len(self.detections)} entries for {len(self.frames)} frames)" + ) + return self +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd api && uv run pytest tests/test_schemas.py -v` +Expected: all PASS (new tests and pre-existing ones). + +- [ ] **Step 5: Commit** + +```bash +git add api/src/temporal_model/api/schemas.py api/tests/test_schemas.py +git commit -m "feat(api): accept per-frame supplied detections in PredictRequest" +``` + +--- + +### Task 2: Runner bypass — skip detector and cache when detections are supplied + +**Files:** +- Modify: `api/src/temporal_model/api/model_runner.py` (imports ~line 19, `predict` at lines 120–139, `_predict_sync` at lines 141–177) +- Test: `api/tests/test_model_runner.py` (`_OrchestrationModel` at lines 117–143) + +- [ ] **Step 1: Extend the orchestration fake to record full detections** + +In `api/tests/test_model_runner.py`, update `_OrchestrationModel` so `detect()` returns a real box (clean binary-fraction floats so dataclass equality is exact) and `predict()` records the full `frame_detections` dict: + +```python +class _OrchestrationModel: + """Fake core model recording how detection is invoked across calls.""" + + def __init__(self): + self.detect_calls: list[list[str]] = [] + self.predict_calls: list[set[str]] = [] + self.roi_calls: list[tuple | None] = [] + self.frame_detections_calls: list[dict] = [] + + def load_sequence(self, paths): + return [ + Frame(frame_id=Path(p).stem, image_path=Path(p), timestamp=None) + for p in paths + ] + + def detect(self, frames): + self.detect_calls.append([f.frame_id for f in frames]) + return [ + FrameDetections( + frame_idx=i, + frame_id=f.frame_id, + timestamp=None, + detections=[ + Detection( + class_id=0, cx=0.5, cy=0.5, w=0.5, h=0.5, confidence=0.75 + ) + ], + ) + for i, f in enumerate(frames) + ] + + def predict(self, frames, *, frame_detections=None, roi=None, timer=None): + self.predict_calls.append(set(frame_detections or {})) + self.roi_calls.append(roi) + self.frame_detections_calls.append(frame_detections or {}) + return SimpleNamespace(frame_ids=[f.frame_id for f in frames]) +``` + +`Detection` needs importing at the top of the file alongside the existing `Frame`/`FrameDetections` import (check the import block at the top; it already imports from `temporal_model.core.types`): + +```python +from temporal_model.core.types import Detection, Frame, FrameDetections +``` + +Existing tests only inspect `detect_calls` ids and `predict_calls` key sets, so the richer `detect()` return changes nothing for them. + +- [ ] **Step 2: Write the failing tests** + +Append to `api/tests/test_model_runner.py`. Add `pytest` and `SuppliedDetection` imports at the top if missing: + +```python +import pytest + +from temporal_model.api.schemas import SuppliedDetection +``` + +Tests: + +```python +def _supplied_box(): + return SuppliedDetection(xyxyn=(0.1, 0.2, 0.5, 0.8), confidence=0.7) + + +def test_predict_with_supplied_detections_skips_detect(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run( + runner.predict( + ["c/x_00.jpg", "c/x_01.jpg"], detections=[[_supplied_box()], []] + ) + ) + + assert model.detect_calls == [] + assert model.predict_calls[-1] == {"x_00", "x_01"} + + +def test_predict_supplied_detections_converted_to_xywhn(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) + + fd = model.frame_detections_calls[-1]["x_00"] + assert fd.frame_idx == 0 + [det] = fd.detections + assert (det.cx, det.cy, det.w, det.h) == pytest.approx((0.3, 0.5, 0.4, 0.6)) + assert det.confidence == 0.7 + assert det.class_id == 0 + + +def test_predict_supplied_empty_frame_has_no_detections(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[]])) + + assert model.frame_detections_calls[-1]["x_00"].detections == [] + + +def test_predict_supplied_detections_do_not_enter_cache(): + model = _OrchestrationModel() + runner = ModelRunner( + model, name="m", version="1", calibrated=True, detection_cache_size=4096 + ) + asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) + asyncio.run(runner.predict(["c/x_00.jpg"])) + + # The supplied run wrote nothing: the plain run must re-detect the frame. + assert model.detect_calls == [["x_00"]] + + +def test_predict_supplied_detections_ignore_cached_entries(): + model = _OrchestrationModel() + runner = ModelRunner( + model, name="m", version="1", calibrated=True, detection_cache_size=4096 + ) + asyncio.run(runner.predict(["c/x_00.jpg"])) # warms cache (confidence 0.75) + asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) + + # The supplied run used the supplied box, not the cached detector output. + [det] = model.frame_detections_calls[-1]["x_00"].detections + assert det.confidence == 0.7 + + +def test_predict_supplied_detections_profile_counters(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + profile: dict = {} + asyncio.run( + runner.predict( + ["c/x_00.jpg"], detections=[[_supplied_box()]], profile=profile + ) + ) + + assert profile == {"n_frames": 1, "cache_hits": 0, "cache_misses": 0} + + +def test_predict_supplied_detections_threads_roi(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run( + runner.predict( + ["c/x_00.jpg"], detections=[[_supplied_box()]], roi=(0.1, 0.2, 0.3, 0.4) + ) + ) + + assert model.roi_calls[-1] == (0.1, 0.2, 0.3, 0.4) + + +def test_predict_supplied_matches_detector_path(): + # Supplying the exact box the detector would produce (xywhn 0.5/0.5/0.5/0.5 + # == xyxyn 0.25..0.75) hands the model identical FrameDetections. + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + paths = ["c/x_00.jpg", "c/x_01.jpg"] + + asyncio.run(runner.predict(paths)) + detector_fds = model.frame_detections_calls[-1] + + equivalent = SuppliedDetection(xyxyn=(0.25, 0.25, 0.75, 0.75), confidence=0.75) + asyncio.run(runner.predict(paths, detections=[[equivalent], [equivalent]])) + supplied_fds = model.frame_detections_calls[-1] + + assert supplied_fds == detector_fds +``` + +- [ ] **Step 3: Run tests to verify they fail** + +Run: `cd api && uv run pytest tests/test_model_runner.py -v -k supplied` +Expected: FAIL — `TypeError: ModelRunner.predict() got an unexpected keyword argument 'detections'`. + +- [ ] **Step 4: Implement the runner bypass** + +In `api/src/temporal_model/api/model_runner.py`: + +4a. Add imports at the top (after the existing `from temporal_model.core.stage_timer import ...` line): + +```python +from temporal_model.core.types import Detection, FrameDetections + +from .detection_cache import DetectionCache +from .schemas import SuppliedDetection +``` + +(`.detection_cache` is already imported; only add the other two lines, keeping import order ruff-clean.) + +4b. Add a module-level helper above `class ModelRunner`: + +```python +def _supplied_frame_detections( + frames: list[Any], detections: list[list[SuppliedDetection]] +) -> dict[str, FrameDetections]: + """Convert caller-supplied xyxyn boxes to per-frame ``FrameDetections``. + + ``detections`` is index-aligned with ``frames`` (lengths validated at the + HTTP boundary; ``strict=True`` is a safety net). Boxes arrive as + normalized corners and become center-based xywhn ``Detection``s; supplied + boxes are smoke by definition (``class_id=0``). + """ + resolved: dict[str, FrameDetections] = {} + for idx, (frame, boxes) in enumerate(zip(frames, detections, strict=True)): + resolved[frame.frame_id] = FrameDetections( + frame_idx=idx, + frame_id=frame.frame_id, + timestamp=frame.timestamp, + detections=[ + Detection( + class_id=0, + cx=(b.xyxyn[0] + b.xyxyn[2]) / 2.0, + cy=(b.xyxyn[1] + b.xyxyn[3]) / 2.0, + w=b.xyxyn[2] - b.xyxyn[0], + h=b.xyxyn[3] - b.xyxyn[1], + confidence=b.confidence, + ) + for b in boxes + ], + ) + return resolved +``` + +4c. Thread the parameter through `predict` (signature + docstring + threadpool call): + +```python + async def predict( + self, + frame_paths: list[Path], + *, + roi: tuple[float, float, float, float] | None = None, + detections: list[list[SuppliedDetection]] | None = None, + timer: StageTimer | None = None, + profile: dict[str, Any] | None = None, + ) -> Any: + """Resolve detections (cache + detect misses) then run the model. + + The whole orchestration runs in a worker thread under the lock, so the + cache is accessed by one prediction at a time. When ``timer``/``profile`` + are supplied, the ``detector`` stage is timed and cache counts recorded. + ``roi`` is passed through to the core model untouched — the cache stays + full-frame (see the invariant in the ROI spec). When ``detections`` is + supplied (index-aligned per-frame boxes from the caller's own + detector), the bundled detector and its cache are bypassed entirely: + no read, no write, no ``detector`` stage. + """ + async with self._lock: + return await run_in_threadpool( + self._predict_sync, frame_paths, roi, detections, timer, profile + ) +``` + +4d. Branch in `_predict_sync` right after `load_sequence`: + +```python + def _predict_sync( + self, + frame_paths: list[Path], + roi: tuple[float, float, float, float] | None = None, + detections: list[list[SuppliedDetection]] | None = None, + timer: StageTimer | None = None, + profile: dict[str, Any] | None = None, + ) -> Any: + started = time.perf_counter() + frames = self._model.load_sequence(frame_paths) + if detections is not None: + out = self._model.predict( + frames, + frame_detections=_supplied_frame_detections(frames, detections), + roi=roi, + timer=timer, + ) + if profile is not None: + profile["n_frames"] = len(frames) + profile["cache_hits"] = 0 + profile["cache_misses"] = 0 + logger.info( + "predict: supplied detections, seq_len=%d, %.0fms", + len(frames), + (time.perf_counter() - started) * 1000.0, + ) + return out + resolved: dict[str, Any] = {} + ... # existing detector-path body continues unchanged from here +``` + +(The `...` is the existing code from `resolved: dict[str, Any] = {}` onward — do not modify it.) + +- [ ] **Step 5: Run tests to verify they pass** + +Run: `cd api && uv run pytest tests/test_model_runner.py -v` +Expected: all PASS (new and pre-existing). + +- [ ] **Step 6: Commit** + +```bash +git add api/src/temporal_model/api/model_runner.py api/tests/test_model_runner.py +git commit -m "feat(api): bypass detector and cache when detections are supplied" +``` + +--- + +### Task 3: App threading + `detections_source` provenance in verbose details + +**Files:** +- Modify: `api/src/temporal_model/api/app.py` (predict handler, lines 146–169) +- Modify: `api/src/temporal_model/api/schemas.py` (`Preprocessing` ~line 100, `_to_details` ~line 142, `to_response` ~line 171) +- Test: `api/tests/test_app.py` (`FakeRunner` at lines 58–79), `api/tests/test_schemas.py` + +- [ ] **Step 1: Write the failing tests** + +1a. Append to `api/tests/test_schemas.py`: + +```python +def test_verbose_details_detections_source_request(): + out = SimpleNamespace( + is_positive=False, trigger_frame_index=None, details=_details([]) + ) + resp = to_response( + out, + name="m", + version="1", + calibrated=True, + verbose=True, + detections_source="request", + ) + assert resp.details.preprocessing.detections_source == "request" + + +def test_verbose_details_detections_source_defaults_to_detector(): + out = SimpleNamespace( + is_positive=False, trigger_frame_index=None, details=_details([]) + ) + resp = to_response(out, name="m", version="1", calibrated=True, verbose=True) + assert resp.details.preprocessing.detections_source == "detector" +``` + +1b. In `api/tests/test_app.py`, update `FakeRunner` to accept and record the new kwarg: + +```python + def __init__(self, output=None, error=None): + self._output = output + self._error = error + self.roi = None + self.detections = None + + async def predict( + self, paths, *, roi=None, detections=None, timer=None, profile=None + ): + self.roi = roi + self.detections = detections + if self._error: + raise self._error + if timer is not None: + with timer.stage("detector"): + pass + if profile is not None: + profile.update(n_frames=len(paths), cache_hits=0, cache_misses=len(paths)) + return self._output +``` + +1c. Append endpoint tests to `api/tests/test_app.py` (`KEYS` has exactly 2 frames): + +```python +def test_predict_passes_detections_to_runner(client): + r = client.post( + "/predict", + json={ + "frames": KEYS, + "detections": [ + [{"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 0.6}], + [], + ], + }, + ) + assert r.status_code == 200 + sent = client.app.state.runner.detections + assert sent[0][0].xyxyn == (0.1, 0.2, 0.3, 0.4) + assert sent[0][0].confidence == 0.6 + assert sent[1] == [] + + +def test_predict_without_detections_passes_none(client): + r = client.post("/predict", json={"frames": KEYS}) + assert r.status_code == 200 + assert client.app.state.runner.detections is None + + +def test_predict_detections_length_mismatch_is_400(client): + r = client.post("/predict", json={"frames": KEYS, "detections": [[]]}) + assert r.status_code == 400 + body = r.json() + assert body["code"] == "invalid_request" + assert "one entry per frame" in body["detail"] + + +def test_predict_malformed_detection_is_400(client): + r = client.post( + "/predict", + json={ + "frames": KEYS, + "detections": [ + [{"xyxyn": [0.3, 0.2, 0.1, 0.4], "confidence": 0.6}], + [], + ], + }, + ) + assert r.status_code == 400 + assert r.json()["code"] == "invalid_request" + + +def test_predict_detections_compose_with_roi(client): + r = client.post( + "/predict", + json={ + "frames": KEYS, + "detections": [[], []], + "roi_xyxyn": [0.0, 0.0, 1.0, 1.0], + }, + ) + assert r.status_code == 200 + assert client.app.state.runner.roi == (0.0, 0.0, 1.0, 1.0) + assert client.app.state.runner.detections == [[], []] + + +def test_predict_verbose_detections_source_request(client): + r = client.post( + "/predict?verbose=true", json={"frames": KEYS, "detections": [[], []]} + ) + assert r.status_code == 200 + assert r.json()["details"]["preprocessing"]["detections_source"] == "request" + + +def test_predict_verbose_detections_source_detector(client): + r = client.post("/predict?verbose=true", json={"frames": KEYS}) + assert r.status_code == 200 + assert r.json()["details"]["preprocessing"]["detections_source"] == "detector" +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd api && uv run pytest tests/test_app.py tests/test_schemas.py -v -k "detections_source or detections"` +Expected: FAIL — `to_response() got an unexpected keyword argument 'detections_source'`; endpoint tests fail with `detections_source` missing from the verbose payload and `FakeRunner.detections` never set by the app. + +- [ ] **Step 3: Implement** + +3a. In `api/src/temporal_model/api/schemas.py`, add the field to `Preprocessing`: + +```python +class Preprocessing(BaseModel): + num_frames_input: int + num_truncated: int + padded_frame_indices: list[int] + num_tube_candidates: int + num_tubes_outside_roi: int + # Provenance: "request" when the caller supplied the detections (bundled + # detector bypassed), "detector" when the bundled YOLO produced them. + detections_source: Literal["request", "detector"] +``` + +3b. Thread it through `_to_details` (add parameter, pass into `Preprocessing(...)`): + +```python +def _to_details( + details: dict[str, Any], + *, + threshold_overridden: bool, + packaged_threshold: float | None, + detections_source: Literal["request", "detector"], + profiling: dict[str, Any] | None = None, +) -> Details: +``` + +and inside the `Preprocessing(` call add `detections_source=detections_source,`. + +3c. Thread it through `to_response` (default keeps every existing caller working): + +```python +def to_response( + out: Any, + *, + name: str, + version: str | None, + calibrated: bool, + verbose: bool, + threshold_overridden: bool = False, + packaged_threshold: float | None = None, + detections_source: Literal["request", "detector"] = "detector", + profiling: dict[str, Any] | None = None, +) -> PredictResponse: +``` + +and in the `verbose` branch pass `detections_source=detections_source,` to `_to_details`. + +3d. In `api/src/temporal_model/api/app.py`, update the two call sites in `predict`: + +```python + out = await runner.predict( + paths, + roi=body.roi_xyxyn, + detections=body.detections, + timer=timer, + profile=profile, + ) +``` + +```python + return to_response( + out, + name=runner.name, + version=runner.version, + calibrated=runner.calibrated, + verbose=verbose, + threshold_overridden=runner.threshold_overridden, + packaged_threshold=runner.packaged_threshold, + detections_source=( + "request" if body.detections is not None else "detector" + ), + profiling=profiling, + ) +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd api && uv run pytest tests/test_app.py tests/test_schemas.py -v` +Expected: all PASS. + +- [ ] **Step 5: Commit** + +```bash +git add api/src/temporal_model/api/app.py api/src/temporal_model/api/schemas.py api/tests/test_app.py api/tests/test_schemas.py +git commit -m "feat(api): thread supplied detections through /predict with provenance" +``` + +--- + +### Task 4: README, lint, full suite + +**Files:** +- Modify: `api/README.md` (the `POST /predict` bullet, lines 11–16) + +- [ ] **Step 1: Update the endpoint documentation** + +Replace the `POST /predict` bullet in `api/README.md` with: + +```markdown +- `POST /predict` — body `{ "frames": ["", ...], "bucket": "", + "roi_xyxyn": [x_min, y_min, x_max, y_max], + "detections": [[{"xyxyn": [...], "confidence": 0.6}], []] }` + (ordered S3 keys; `bucket` optional, falls back to `S3_BUCKET`; + `roi_xyxyn` optional normalized region of interest — tubes with no real + detection intersecting it are dropped before scoring; + `detections` optional caller-supplied boxes, one list per frame + index-aligned with `frames`, `[]` = that frame's detector saw nothing — + skips the bundled YOLO and its cache entirely, tubes are built from the + supplied boxes); + returns `{ is_smoke, probability, model }` (`probability` = max kept-tube + calibrated probability, `null` if uncalibrated). + `POST /predict?verbose=true` adds a `details` block (decision, preprocessing, + per-tube tracks). +``` + +- [ ] **Step 2: Lint and run the full API suite** + +Run: `make -C api lint && make -C api test` +Expected: lint clean, all tests PASS. + +- [ ] **Step 3: Run the core suite (regression — core is meant to be untouched)** + +Run: `git status --short core/` (expect no modifications) and `make -C core test` +Expected: no core diffs; all core tests PASS. + +- [ ] **Step 4: Commit** + +```bash +git add api/README.md +git commit -m "docs(api): document the detections field on /predict" +``` From 9d7430c69453804d5c62f35341d4be161d766d2e Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:46:27 +0200 Subject: [PATCH 05/12] feat(api): accept per-frame supplied detections in PredictRequest --- api/src/temporal_model/api/schemas.py | 47 +++++++++++++++++++++++- api/tests/test_schemas.py | 52 +++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/api/src/temporal_model/api/schemas.py b/api/src/temporal_model/api/schemas.py index dd2a124..73ad64e 100644 --- a/api/src/temporal_model/api/schemas.py +++ b/api/src/temporal_model/api/schemas.py @@ -8,7 +8,13 @@ import re from typing import Any, Literal -from pydantic import BaseModel, ConfigDict, Field, field_validator +from pydantic import ( + BaseModel, + ConfigDict, + Field, + field_validator, + model_validator, +) from temporal_model.core.tubes import validate_roi @@ -19,6 +25,30 @@ _BUCKET_RE = re.compile(r"^[a-z0-9][a-z0-9.-]{1,61}[a-z0-9]$") +class SuppliedDetection(BaseModel): + """One caller-supplied detection box (normalized xyxyn corners). + + Geometry rules match ``roi_xyxyn``. Checked inline rather than via the + core ``validate_roi`` helper so the error message names the detection + field, not "roi". + """ + + xyxyn: tuple[float, float, float, float] + confidence: float = Field(ge=0.0, le=1.0) + + @field_validator("xyxyn") + @classmethod + def _validate_xyxyn( + cls, v: tuple[float, float, float, float] + ) -> tuple[float, float, float, float]: + x_min, y_min, x_max, y_max = v + if not all(0.0 <= c <= 1.0 for c in v): + raise ValueError("xyxyn coordinates must be in [0, 1]") + if x_min >= x_max or y_min >= y_max: + raise ValueError("xyxyn requires x_min < x_max and y_min < y_max") + return v + + class PredictRequest(BaseModel): frames: list[str] # Optional per-request S3 bucket. Falls back to settings.s3_bucket when @@ -31,6 +61,12 @@ class PredictRequest(BaseModel): # detection intersecting it are dropped before scoring (see # docs/specs/2026-06-10-api-roi-design.md). roi_xyxyn: tuple[float, float, float, float] | None = None + # Optional caller-supplied detections, one list per frame, index-aligned + # with `frames` ([] = that frame's detector saw nothing — never null). + # When set, the bundled YOLO and its cache are bypassed entirely and tubes + # are built from these boxes (see + # docs/specs/2026-06-11-api-supplied-detections-design.md). + detections: list[list[SuppliedDetection]] | None = None @field_validator("frames") @classmethod @@ -68,6 +104,15 @@ def _validate_roi( raise ValueError(f"roi_xyxyn: {e}") from e return v + @model_validator(mode="after") + def _detections_match_frames(self) -> "PredictRequest": + if self.detections is not None and len(self.detections) != len(self.frames): + raise ValueError( + "detections must have exactly one entry per frame " + f"(got {len(self.detections)} entries for {len(self.frames)} frames)" + ) + return self + class FrameEntry(BaseModel): frame_idx: int diff --git a/api/tests/test_schemas.py b/api/tests/test_schemas.py index 990310c..508eec7 100644 --- a/api/tests/test_schemas.py +++ b/api/tests/test_schemas.py @@ -243,3 +243,55 @@ def test_verbose_details_num_tubes_outside_roi_is_strict(): out = SimpleNamespace(is_positive=True, trigger_frame_index=3, details=details) with pytest.raises(KeyError): to_response(out, name="m", version="1", calibrated=True, verbose=True) + + +def test_request_detections_default_to_none(): + assert PredictRequest(frames=["a.jpg"]).detections is None + + +def test_request_accepts_per_frame_detections(): + req = PredictRequest( + frames=["a.jpg", "b.jpg"], + detections=[ + [{"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 0.7}], + [], + ], + ) + assert req.detections[0][0].xyxyn == (0.1, 0.2, 0.3, 0.4) + assert req.detections[0][0].confidence == 0.7 + assert req.detections[1] == [] + + +@pytest.mark.parametrize("entries", [[], [[]], [[], [], []]]) +def test_request_rejects_detections_length_mismatch(entries): + # frames has 2 keys; 0, 1 and 3 detection entries must all fail. + with pytest.raises(ValidationError, match="one entry per frame"): + PredictRequest(frames=["a.jpg", "b.jpg"], detections=entries) + + +def test_request_rejects_null_frame_entry(): + # "no detections" must be an explicit [], never null. + with pytest.raises(ValidationError): + PredictRequest(frames=["a.jpg"], detections=[None]) + + +@pytest.mark.parametrize( + "box", + [ + {"xyxyn": [-0.1, 0.2, 0.3, 0.4], "confidence": 0.5}, # coord < 0 + {"xyxyn": [0.1, 0.2, 0.3, 1.4], "confidence": 0.5}, # coord > 1 + {"xyxyn": [0.3, 0.2, 0.1, 0.4], "confidence": 0.5}, # x_min >= x_max + {"xyxyn": [0.1, 0.4, 0.3, 0.4], "confidence": 0.5}, # y_min >= y_max + {"xyxyn": [0.1, 0.2, 0.3], "confidence": 0.5}, # too short + {"xyxyn": [0.1, 0.2, 0.3, 0.4, 0.5], "confidence": 0.5}, # too long + {"xyxyn": ["a", 0.2, 0.3, 0.4], "confidence": 0.5}, # non-numeric + {"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 1.5}, # confidence > 1 + {"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": -0.1}, # confidence < 0 + {"xyxyn": [0.1, 0.2, 0.3, 0.4]}, # missing confidence + {"confidence": 0.5}, # missing xyxyn + "not-an-object", # wrong type entirely + ], +) +def test_request_rejects_malformed_detection(box): + with pytest.raises(ValidationError): + PredictRequest(frames=["a.jpg"], detections=[[box]]) From b7fe2e7814a54d5db34b43e238b72546c88ac814 Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:48:13 +0200 Subject: [PATCH 06/12] feat(api): bypass detector and cache when detections are supplied --- api/src/temporal_model/api/model_runner.py | 59 +++++++++- api/tests/test_model_runner.py | 120 ++++++++++++++++++++- 2 files changed, 175 insertions(+), 4 deletions(-) diff --git a/api/src/temporal_model/api/model_runner.py b/api/src/temporal_model/api/model_runner.py index d2ba875..27ca7ec 100644 --- a/api/src/temporal_model/api/model_runner.py +++ b/api/src/temporal_model/api/model_runner.py @@ -17,8 +17,10 @@ from starlette.concurrency import run_in_threadpool from temporal_model.core.stage_timer import StageTimer, stage_ctx +from temporal_model.core.types import Detection, FrameDetections from .detection_cache import DetectionCache +from .schemas import SuppliedDetection logger = logging.getLogger(__name__) @@ -45,6 +47,37 @@ def _load_core_model(package_path: Path, device: str | None) -> Any: return BboxTubeTemporalModel.from_package(package_path, device=device) +def _supplied_frame_detections( + frames: list[Any], detections: list[list[SuppliedDetection]] +) -> dict[str, FrameDetections]: + """Convert caller-supplied xyxyn boxes to per-frame ``FrameDetections``. + + ``detections`` is index-aligned with ``frames`` (lengths validated at the + HTTP boundary; ``strict=True`` is a safety net). Boxes arrive as + normalized corners and become center-based xywhn ``Detection``s; supplied + boxes are smoke by definition (``class_id=0``). + """ + resolved: dict[str, FrameDetections] = {} + for idx, (frame, boxes) in enumerate(zip(frames, detections, strict=True)): + resolved[frame.frame_id] = FrameDetections( + frame_idx=idx, + frame_id=frame.frame_id, + timestamp=frame.timestamp, + detections=[ + Detection( + class_id=0, + cx=(b.xyxyn[0] + b.xyxyn[2]) / 2.0, + cy=(b.xyxyn[1] + b.xyxyn[3]) / 2.0, + w=b.xyxyn[2] - b.xyxyn[0], + h=b.xyxyn[3] - b.xyxyn[1], + confidence=b.confidence, + ) + for b in boxes + ], + ) + return resolved + + class ModelRunner: """Holds the loaded model and serializes inference calls.""" @@ -122,6 +155,7 @@ async def predict( frame_paths: list[Path], *, roi: tuple[float, float, float, float] | None = None, + detections: list[list[SuppliedDetection]] | None = None, timer: StageTimer | None = None, profile: dict[str, Any] | None = None, ) -> Any: @@ -131,22 +165,43 @@ async def predict( cache is accessed by one prediction at a time. When ``timer``/``profile`` are supplied, the ``detector`` stage is timed and cache counts recorded. ``roi`` is passed through to the core model untouched — the cache stays - full-frame (see the invariant in the ROI spec). + full-frame (see the invariant in the ROI spec). When ``detections`` is + supplied (index-aligned per-frame boxes from the caller's own + detector), the bundled detector and its cache are bypassed entirely: + no read, no write, no ``detector`` stage. """ async with self._lock: return await run_in_threadpool( - self._predict_sync, frame_paths, roi, timer, profile + self._predict_sync, frame_paths, roi, detections, timer, profile ) def _predict_sync( self, frame_paths: list[Path], roi: tuple[float, float, float, float] | None = None, + detections: list[list[SuppliedDetection]] | None = None, timer: StageTimer | None = None, profile: dict[str, Any] | None = None, ) -> Any: started = time.perf_counter() frames = self._model.load_sequence(frame_paths) + if detections is not None: + out = self._model.predict( + frames, + frame_detections=_supplied_frame_detections(frames, detections), + roi=roi, + timer=timer, + ) + if profile is not None: + profile["n_frames"] = len(frames) + profile["cache_hits"] = 0 + profile["cache_misses"] = 0 + logger.info( + "predict: supplied detections, seq_len=%d, %.0fms", + len(frames), + (time.perf_counter() - started) * 1000.0, + ) + return out resolved: dict[str, Any] = {} misses = [] for f in frames: diff --git a/api/tests/test_model_runner.py b/api/tests/test_model_runner.py index a30f036..43d4647 100644 --- a/api/tests/test_model_runner.py +++ b/api/tests/test_model_runner.py @@ -3,13 +3,15 @@ from pathlib import Path from types import SimpleNamespace +import pytest import yaml from temporal_model.api import model_runner as mr from temporal_model.api.model_runner import ModelRunner, read_manifest +from temporal_model.api.schemas import SuppliedDetection from temporal_model.core.protocol import Frame from temporal_model.core.stage_timer import StageTimer -from temporal_model.core.types import FrameDetections +from temporal_model.core.types import Detection, FrameDetections def _make_package(tmp_path, manifest: dict): @@ -121,6 +123,7 @@ def __init__(self): self.detect_calls: list[list[str]] = [] self.predict_calls: list[set[str]] = [] self.roi_calls: list[tuple | None] = [] + self.frame_detections_calls: list[dict] = [] def load_sequence(self, paths): return [ @@ -132,7 +135,14 @@ def detect(self, frames): self.detect_calls.append([f.frame_id for f in frames]) return [ FrameDetections( - frame_idx=i, frame_id=f.frame_id, timestamp=None, detections=[] + frame_idx=i, + frame_id=f.frame_id, + timestamp=None, + detections=[ + Detection( + class_id=0, cx=0.5, cy=0.5, w=0.5, h=0.5, confidence=0.75 + ) + ], ) for i, f in enumerate(frames) ] @@ -140,6 +150,7 @@ def detect(self, frames): def predict(self, frames, *, frame_detections=None, roi=None, timer=None): self.predict_calls.append(set(frame_detections or {})) self.roi_calls.append(roi) + self.frame_detections_calls.append(frame_detections or {}) return SimpleNamespace(frame_ids=[f.frame_id for f in frames]) @@ -191,6 +202,111 @@ def test_predict_cache_disabled_detects_every_frame(): assert model.detect_calls[1] == ["x_00", "x_01", "x_02"] # full each call +def _supplied_box(): + return SuppliedDetection(xyxyn=(0.1, 0.2, 0.5, 0.8), confidence=0.7) + + +def test_predict_with_supplied_detections_skips_detect(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run( + runner.predict( + ["c/x_00.jpg", "c/x_01.jpg"], detections=[[_supplied_box()], []] + ) + ) + + assert model.detect_calls == [] + assert model.predict_calls[-1] == {"x_00", "x_01"} + + +def test_predict_supplied_detections_converted_to_xywhn(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) + + fd = model.frame_detections_calls[-1]["x_00"] + assert fd.frame_idx == 0 + [det] = fd.detections + assert (det.cx, det.cy, det.w, det.h) == pytest.approx((0.3, 0.5, 0.4, 0.6)) + assert det.confidence == 0.7 + assert det.class_id == 0 + + +def test_predict_supplied_empty_frame_has_no_detections(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[]])) + + assert model.frame_detections_calls[-1]["x_00"].detections == [] + + +def test_predict_supplied_detections_do_not_enter_cache(): + model = _OrchestrationModel() + runner = ModelRunner( + model, name="m", version="1", calibrated=True, detection_cache_size=4096 + ) + asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) + asyncio.run(runner.predict(["c/x_00.jpg"])) + + # The supplied run wrote nothing: the plain run must re-detect the frame. + assert model.detect_calls == [["x_00"]] + + +def test_predict_supplied_detections_ignore_cached_entries(): + model = _OrchestrationModel() + runner = ModelRunner( + model, name="m", version="1", calibrated=True, detection_cache_size=4096 + ) + asyncio.run(runner.predict(["c/x_00.jpg"])) # warms cache (confidence 0.75) + asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) + + # The supplied run used the supplied box, not the cached detector output. + [det] = model.frame_detections_calls[-1]["x_00"].detections + assert det.confidence == 0.7 + + +def test_predict_supplied_detections_profile_counters(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + profile: dict = {} + asyncio.run( + runner.predict( + ["c/x_00.jpg"], detections=[[_supplied_box()]], profile=profile + ) + ) + + assert profile == {"n_frames": 1, "cache_hits": 0, "cache_misses": 0} + + +def test_predict_supplied_detections_threads_roi(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run( + runner.predict( + ["c/x_00.jpg"], detections=[[_supplied_box()]], roi=(0.1, 0.2, 0.3, 0.4) + ) + ) + + assert model.roi_calls[-1] == (0.1, 0.2, 0.3, 0.4) + + +def test_predict_supplied_matches_detector_path(): + # Supplying the exact box the detector would produce (xywhn 0.5/0.5/0.5/0.5 + # == xyxyn 0.25..0.75) hands the model identical FrameDetections. + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + paths = ["c/x_00.jpg", "c/x_01.jpg"] + + asyncio.run(runner.predict(paths)) + detector_fds = model.frame_detections_calls[-1] + + equivalent = SuppliedDetection(xyxyn=(0.25, 0.25, 0.75, 0.75), confidence=0.75) + asyncio.run(runner.predict(paths, detections=[[equivalent], [equivalent]])) + supplied_fds = model.frame_detections_calls[-1] + + assert supplied_fds == detector_fds + + class _StubFrame: def __init__(self, fid): self.frame_id = fid From 04ab49572d78f795b2b43881d17f08da49d5821a Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:49:58 +0200 Subject: [PATCH 07/12] feat(api): thread supplied detections through /predict with provenance --- api/src/temporal_model/api/app.py | 9 ++- api/src/temporal_model/api/schemas.py | 7 +++ api/tests/test_app.py | 81 ++++++++++++++++++++++++++- api/tests/test_schemas.py | 23 ++++++++ 4 files changed, 118 insertions(+), 2 deletions(-) diff --git a/api/src/temporal_model/api/app.py b/api/src/temporal_model/api/app.py index 1f5d823..3cd5f87 100644 --- a/api/src/temporal_model/api/app.py +++ b/api/src/temporal_model/api/app.py @@ -144,7 +144,11 @@ async def predict( ) out = await runner.predict( - paths, roi=body.roi_xyxyn, timer=timer, profile=profile + paths, + roi=body.roi_xyxyn, + detections=body.detections, + timer=timer, + profile=profile, ) profiling = None @@ -165,6 +169,9 @@ async def predict( verbose=verbose, threshold_overridden=runner.threshold_overridden, packaged_threshold=runner.packaged_threshold, + detections_source=( + "request" if body.detections is not None else "detector" + ), profiling=profiling, ) except ApiError: diff --git a/api/src/temporal_model/api/schemas.py b/api/src/temporal_model/api/schemas.py index 73ad64e..d2a21d3 100644 --- a/api/src/temporal_model/api/schemas.py +++ b/api/src/temporal_model/api/schemas.py @@ -148,6 +148,9 @@ class Preprocessing(BaseModel): padded_frame_indices: list[int] num_tube_candidates: int num_tubes_outside_roi: int + # Provenance: "request" when the caller supplied the detections (bundled + # detector bypassed), "detector" when the bundled YOLO produced them. + detections_source: Literal["request", "detector"] class Details(BaseModel): @@ -189,6 +192,7 @@ def _to_details( *, threshold_overridden: bool, packaged_threshold: float | None, + detections_source: Literal["request", "detector"], profiling: dict[str, Any] | None = None, ) -> Details: tubes_block = details["tubes"] @@ -207,6 +211,7 @@ def _to_details( # Strict like num_candidates: core (same-commit path dependency) # always emits the key; a silent 0 here would mask a core rename. num_tubes_outside_roi=tubes_block["num_outside_roi"], + detections_source=detections_source, ), tubes=[Tube(**t) for t in tubes_block["kept"]], profiling=profiling, @@ -222,6 +227,7 @@ def to_response( verbose: bool, threshold_overridden: bool = False, packaged_threshold: float | None = None, + detections_source: Literal["request", "detector"] = "detector", profiling: dict[str, Any] | None = None, ) -> PredictResponse: """Reshape a core model output into the public response DTO.""" @@ -235,6 +241,7 @@ def to_response( out.details, threshold_overridden=threshold_overridden, packaged_threshold=packaged_threshold, + detections_source=detections_source, profiling=profiling, ) return PredictResponse(**kwargs) diff --git a/api/tests/test_app.py b/api/tests/test_app.py index bcf0878..1138627 100644 --- a/api/tests/test_app.py +++ b/api/tests/test_app.py @@ -66,9 +66,13 @@ def __init__(self, output=None, error=None): self._output = output self._error = error self.roi = None + self.detections = None - async def predict(self, paths, *, roi=None, timer=None, profile=None): + async def predict( + self, paths, *, roi=None, detections=None, timer=None, profile=None + ): self.roi = roi + self.detections = detections if self._error: raise self._error if timer is not None: @@ -412,3 +416,78 @@ def test_predict_invalid_roi_is_400(client): body = r.json() assert body["code"] == "invalid_request" assert "roi_xyxyn" in body["detail"] + + +def test_predict_passes_detections_to_runner(client): + r = client.post( + "/predict", + json={ + "frames": KEYS, + "detections": [ + [{"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 0.6}], + [], + ], + }, + ) + assert r.status_code == 200 + sent = client.app.state.runner.detections + assert sent[0][0].xyxyn == (0.1, 0.2, 0.3, 0.4) + assert sent[0][0].confidence == 0.6 + assert sent[1] == [] + + +def test_predict_without_detections_passes_none(client): + r = client.post("/predict", json={"frames": KEYS}) + assert r.status_code == 200 + assert client.app.state.runner.detections is None + + +def test_predict_detections_length_mismatch_is_400(client): + r = client.post("/predict", json={"frames": KEYS, "detections": [[]]}) + assert r.status_code == 400 + body = r.json() + assert body["code"] == "invalid_request" + assert "one entry per frame" in body["detail"] + + +def test_predict_malformed_detection_is_400(client): + r = client.post( + "/predict", + json={ + "frames": KEYS, + "detections": [ + [{"xyxyn": [0.3, 0.2, 0.1, 0.4], "confidence": 0.6}], + [], + ], + }, + ) + assert r.status_code == 400 + assert r.json()["code"] == "invalid_request" + + +def test_predict_detections_compose_with_roi(client): + r = client.post( + "/predict", + json={ + "frames": KEYS, + "detections": [[], []], + "roi_xyxyn": [0.0, 0.0, 1.0, 1.0], + }, + ) + assert r.status_code == 200 + assert client.app.state.runner.roi == (0.0, 0.0, 1.0, 1.0) + assert client.app.state.runner.detections == [[], []] + + +def test_predict_verbose_detections_source_request(client): + r = client.post( + "/predict?verbose=true", json={"frames": KEYS, "detections": [[], []]} + ) + assert r.status_code == 200 + assert r.json()["details"]["preprocessing"]["detections_source"] == "request" + + +def test_predict_verbose_detections_source_detector(client): + r = client.post("/predict?verbose=true", json={"frames": KEYS}) + assert r.status_code == 200 + assert r.json()["details"]["preprocessing"]["detections_source"] == "detector" diff --git a/api/tests/test_schemas.py b/api/tests/test_schemas.py index 508eec7..8484f2e 100644 --- a/api/tests/test_schemas.py +++ b/api/tests/test_schemas.py @@ -245,6 +245,29 @@ def test_verbose_details_num_tubes_outside_roi_is_strict(): to_response(out, name="m", version="1", calibrated=True, verbose=True) +def test_verbose_details_detections_source_request(): + out = SimpleNamespace( + is_positive=False, trigger_frame_index=None, details=_details([]) + ) + resp = to_response( + out, + name="m", + version="1", + calibrated=True, + verbose=True, + detections_source="request", + ) + assert resp.details.preprocessing.detections_source == "request" + + +def test_verbose_details_detections_source_defaults_to_detector(): + out = SimpleNamespace( + is_positive=False, trigger_frame_index=None, details=_details([]) + ) + resp = to_response(out, name="m", version="1", calibrated=True, verbose=True) + assert resp.details.preprocessing.detections_source == "detector" + + def test_request_detections_default_to_none(): assert PredictRequest(frames=["a.jpg"]).detections is None From bf4f72acda2bf0c7d8843223064b2fecab1f0d1e Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:51:15 +0200 Subject: [PATCH 08/12] docs(api): document the detections field on /predict --- api/README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/api/README.md b/api/README.md index ee6e972..4cefa2d 100644 --- a/api/README.md +++ b/api/README.md @@ -9,10 +9,15 @@ Import as `temporal_model.api`. Depends on `temporal-model-core`. - `GET /health` — readiness + loaded model name/version. - `POST /predict` — body `{ "frames": ["", ...], "bucket": "", - "roi_xyxyn": [x_min, y_min, x_max, y_max] }` + "roi_xyxyn": [x_min, y_min, x_max, y_max], + "detections": [[{"xyxyn": [...], "confidence": 0.6}], []] }` (ordered S3 keys; `bucket` optional, falls back to `S3_BUCKET`; `roi_xyxyn` optional normalized region of interest — tubes with no real - detection intersecting it are dropped before scoring); + detection intersecting it are dropped before scoring; + `detections` optional caller-supplied boxes, one list per frame + index-aligned with `frames`, `[]` = that frame's detector saw nothing — + skips the bundled YOLO and its cache entirely, tubes are built from the + supplied boxes); returns `{ is_smoke, probability, model }` (`probability` = max kept-tube calibrated probability, `null` if uncalibrated). `POST /predict?verbose=true` adds a `details` block (decision, preprocessing, From 423572c8bbb88335373e1c3f2a95378b2eccbb84 Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:56:12 +0200 Subject: [PATCH 09/12] docs: drop the executed implementation plan --- .../2026-06-11-api-supplied-detections.md | 747 ------------------ 1 file changed, 747 deletions(-) delete mode 100644 docs/plans/2026-06-11-api-supplied-detections.md diff --git a/docs/plans/2026-06-11-api-supplied-detections.md b/docs/plans/2026-06-11-api-supplied-detections.md deleted file mode 100644 index 6759c3c..0000000 --- a/docs/plans/2026-06-11-api-supplied-detections.md +++ /dev/null @@ -1,747 +0,0 @@ -# Caller-Supplied Detections on `/predict` Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Let `/predict` callers supply per-frame detection boxes (from the RPi edge detector) so the API skips its bundled YOLO pass while tube building, ROI filtering, cropping, and classification run unchanged. - -**Architecture:** A new optional `detections` field on `PredictRequest` (index-aligned list of per-frame box lists, `xyxyn` + `confidence`), validated at the HTTP boundary. `ModelRunner._predict_sync` converts supplied boxes to internal `FrameDetections` and feeds the existing `model.predict(frame_detections=...)` injection seam, skipping `detect()` and the detection cache entirely. Core is untouched. Spec: `docs/specs/2026-06-11-api-supplied-detections-design.md`. - -**Tech Stack:** FastAPI + Pydantic v2 (`api/` package), pytest, `uv` (run tests from `api/`: `uv run pytest tests/ -v`). - -**Branch:** work on `arthur/feat-api-thread-bboxes`. Commit messages: conventional commits, NO co-author trailers. - ---- - -### Task 1: Request schema — `SuppliedDetection` + `detections` field - -**Files:** -- Modify: `api/src/temporal_model/api/schemas.py` (imports at line 11, `PredictRequest` at lines 22–69) -- Test: `api/tests/test_schemas.py` - -- [ ] **Step 1: Write the failing tests** - -Append to `api/tests/test_schemas.py` (existing imports at top already provide `pytest`, `ValidationError`, `PredictRequest`): - -```python -def test_request_detections_default_to_none(): - assert PredictRequest(frames=["a.jpg"]).detections is None - - -def test_request_accepts_per_frame_detections(): - req = PredictRequest( - frames=["a.jpg", "b.jpg"], - detections=[ - [{"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 0.7}], - [], - ], - ) - assert req.detections[0][0].xyxyn == (0.1, 0.2, 0.3, 0.4) - assert req.detections[0][0].confidence == 0.7 - assert req.detections[1] == [] - - -@pytest.mark.parametrize("entries", [[], [[]], [[], [], []]]) -def test_request_rejects_detections_length_mismatch(entries): - # frames has 2 keys; 0, 1 and 3 detection entries must all fail. - with pytest.raises(ValidationError, match="one entry per frame"): - PredictRequest(frames=["a.jpg", "b.jpg"], detections=entries) - - -def test_request_rejects_null_frame_entry(): - # "no detections" must be an explicit [], never null. - with pytest.raises(ValidationError): - PredictRequest(frames=["a.jpg"], detections=[None]) - - -@pytest.mark.parametrize( - "box", - [ - {"xyxyn": [-0.1, 0.2, 0.3, 0.4], "confidence": 0.5}, # coord < 0 - {"xyxyn": [0.1, 0.2, 0.3, 1.4], "confidence": 0.5}, # coord > 1 - {"xyxyn": [0.3, 0.2, 0.1, 0.4], "confidence": 0.5}, # x_min >= x_max - {"xyxyn": [0.1, 0.4, 0.3, 0.4], "confidence": 0.5}, # y_min >= y_max - {"xyxyn": [0.1, 0.2, 0.3], "confidence": 0.5}, # too short - {"xyxyn": [0.1, 0.2, 0.3, 0.4, 0.5], "confidence": 0.5}, # too long - {"xyxyn": ["a", 0.2, 0.3, 0.4], "confidence": 0.5}, # non-numeric - {"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 1.5}, # confidence > 1 - {"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": -0.1}, # confidence < 0 - {"xyxyn": [0.1, 0.2, 0.3, 0.4]}, # missing confidence - {"confidence": 0.5}, # missing xyxyn - "not-an-object", # wrong type entirely - ], -) -def test_request_rejects_malformed_detection(box): - with pytest.raises(ValidationError): - PredictRequest(frames=["a.jpg"], detections=[[box]]) -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd api && uv run pytest tests/test_schemas.py -v -k detection` -Expected: FAIL — `test_request_detections_default_to_none` errors with `AttributeError: 'PredictRequest' object has no attribute 'detections'`; the rejection tests fail because no `ValidationError` is raised (unknown fields are ignored). - -- [ ] **Step 3: Implement the schema** - -In `api/src/temporal_model/api/schemas.py`: - -3a. Extend the pydantic import (line 11): - -```python -from pydantic import ( - BaseModel, - ConfigDict, - Field, - field_validator, - model_validator, -) -``` - -3b. Add `SuppliedDetection` immediately above `class PredictRequest` (after the `_BUCKET_RE` block): - -```python -class SuppliedDetection(BaseModel): - """One caller-supplied detection box (normalized xyxyn corners). - - Geometry rules match ``roi_xyxyn``. Checked inline rather than via the - core ``validate_roi`` helper so the error message names the detection - field, not "roi". - """ - - xyxyn: tuple[float, float, float, float] - confidence: float = Field(ge=0.0, le=1.0) - - @field_validator("xyxyn") - @classmethod - def _validate_xyxyn( - cls, v: tuple[float, float, float, float] - ) -> tuple[float, float, float, float]: - x_min, y_min, x_max, y_max = v - if not all(0.0 <= c <= 1.0 for c in v): - raise ValueError("xyxyn coordinates must be in [0, 1]") - if x_min >= x_max or y_min >= y_max: - raise ValueError("xyxyn requires x_min < x_max and y_min < y_max") - return v -``` - -3c. Add the field to `PredictRequest` directly under `roi_xyxyn` (line 33): - -```python - # Optional caller-supplied detections, one list per frame, index-aligned - # with `frames` ([] = that frame's detector saw nothing — never null). - # When set, the bundled YOLO and its cache are bypassed entirely and tubes - # are built from these boxes (see - # docs/specs/2026-06-11-api-supplied-detections-design.md). - detections: list[list[SuppliedDetection]] | None = None -``` - -3d. Add the cross-field length check after `_validate_roi` (line 69): - -```python - @model_validator(mode="after") - def _detections_match_frames(self) -> "PredictRequest": - if self.detections is not None and len(self.detections) != len(self.frames): - raise ValueError( - "detections must have exactly one entry per frame " - f"(got {len(self.detections)} entries for {len(self.frames)} frames)" - ) - return self -``` - -- [ ] **Step 4: Run tests to verify they pass** - -Run: `cd api && uv run pytest tests/test_schemas.py -v` -Expected: all PASS (new tests and pre-existing ones). - -- [ ] **Step 5: Commit** - -```bash -git add api/src/temporal_model/api/schemas.py api/tests/test_schemas.py -git commit -m "feat(api): accept per-frame supplied detections in PredictRequest" -``` - ---- - -### Task 2: Runner bypass — skip detector and cache when detections are supplied - -**Files:** -- Modify: `api/src/temporal_model/api/model_runner.py` (imports ~line 19, `predict` at lines 120–139, `_predict_sync` at lines 141–177) -- Test: `api/tests/test_model_runner.py` (`_OrchestrationModel` at lines 117–143) - -- [ ] **Step 1: Extend the orchestration fake to record full detections** - -In `api/tests/test_model_runner.py`, update `_OrchestrationModel` so `detect()` returns a real box (clean binary-fraction floats so dataclass equality is exact) and `predict()` records the full `frame_detections` dict: - -```python -class _OrchestrationModel: - """Fake core model recording how detection is invoked across calls.""" - - def __init__(self): - self.detect_calls: list[list[str]] = [] - self.predict_calls: list[set[str]] = [] - self.roi_calls: list[tuple | None] = [] - self.frame_detections_calls: list[dict] = [] - - def load_sequence(self, paths): - return [ - Frame(frame_id=Path(p).stem, image_path=Path(p), timestamp=None) - for p in paths - ] - - def detect(self, frames): - self.detect_calls.append([f.frame_id for f in frames]) - return [ - FrameDetections( - frame_idx=i, - frame_id=f.frame_id, - timestamp=None, - detections=[ - Detection( - class_id=0, cx=0.5, cy=0.5, w=0.5, h=0.5, confidence=0.75 - ) - ], - ) - for i, f in enumerate(frames) - ] - - def predict(self, frames, *, frame_detections=None, roi=None, timer=None): - self.predict_calls.append(set(frame_detections or {})) - self.roi_calls.append(roi) - self.frame_detections_calls.append(frame_detections or {}) - return SimpleNamespace(frame_ids=[f.frame_id for f in frames]) -``` - -`Detection` needs importing at the top of the file alongside the existing `Frame`/`FrameDetections` import (check the import block at the top; it already imports from `temporal_model.core.types`): - -```python -from temporal_model.core.types import Detection, Frame, FrameDetections -``` - -Existing tests only inspect `detect_calls` ids and `predict_calls` key sets, so the richer `detect()` return changes nothing for them. - -- [ ] **Step 2: Write the failing tests** - -Append to `api/tests/test_model_runner.py`. Add `pytest` and `SuppliedDetection` imports at the top if missing: - -```python -import pytest - -from temporal_model.api.schemas import SuppliedDetection -``` - -Tests: - -```python -def _supplied_box(): - return SuppliedDetection(xyxyn=(0.1, 0.2, 0.5, 0.8), confidence=0.7) - - -def test_predict_with_supplied_detections_skips_detect(): - model = _OrchestrationModel() - runner = ModelRunner(model, name="m", version="1", calibrated=True) - asyncio.run( - runner.predict( - ["c/x_00.jpg", "c/x_01.jpg"], detections=[[_supplied_box()], []] - ) - ) - - assert model.detect_calls == [] - assert model.predict_calls[-1] == {"x_00", "x_01"} - - -def test_predict_supplied_detections_converted_to_xywhn(): - model = _OrchestrationModel() - runner = ModelRunner(model, name="m", version="1", calibrated=True) - asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) - - fd = model.frame_detections_calls[-1]["x_00"] - assert fd.frame_idx == 0 - [det] = fd.detections - assert (det.cx, det.cy, det.w, det.h) == pytest.approx((0.3, 0.5, 0.4, 0.6)) - assert det.confidence == 0.7 - assert det.class_id == 0 - - -def test_predict_supplied_empty_frame_has_no_detections(): - model = _OrchestrationModel() - runner = ModelRunner(model, name="m", version="1", calibrated=True) - asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[]])) - - assert model.frame_detections_calls[-1]["x_00"].detections == [] - - -def test_predict_supplied_detections_do_not_enter_cache(): - model = _OrchestrationModel() - runner = ModelRunner( - model, name="m", version="1", calibrated=True, detection_cache_size=4096 - ) - asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) - asyncio.run(runner.predict(["c/x_00.jpg"])) - - # The supplied run wrote nothing: the plain run must re-detect the frame. - assert model.detect_calls == [["x_00"]] - - -def test_predict_supplied_detections_ignore_cached_entries(): - model = _OrchestrationModel() - runner = ModelRunner( - model, name="m", version="1", calibrated=True, detection_cache_size=4096 - ) - asyncio.run(runner.predict(["c/x_00.jpg"])) # warms cache (confidence 0.75) - asyncio.run(runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]])) - - # The supplied run used the supplied box, not the cached detector output. - [det] = model.frame_detections_calls[-1]["x_00"].detections - assert det.confidence == 0.7 - - -def test_predict_supplied_detections_profile_counters(): - model = _OrchestrationModel() - runner = ModelRunner(model, name="m", version="1", calibrated=True) - profile: dict = {} - asyncio.run( - runner.predict( - ["c/x_00.jpg"], detections=[[_supplied_box()]], profile=profile - ) - ) - - assert profile == {"n_frames": 1, "cache_hits": 0, "cache_misses": 0} - - -def test_predict_supplied_detections_threads_roi(): - model = _OrchestrationModel() - runner = ModelRunner(model, name="m", version="1", calibrated=True) - asyncio.run( - runner.predict( - ["c/x_00.jpg"], detections=[[_supplied_box()]], roi=(0.1, 0.2, 0.3, 0.4) - ) - ) - - assert model.roi_calls[-1] == (0.1, 0.2, 0.3, 0.4) - - -def test_predict_supplied_matches_detector_path(): - # Supplying the exact box the detector would produce (xywhn 0.5/0.5/0.5/0.5 - # == xyxyn 0.25..0.75) hands the model identical FrameDetections. - model = _OrchestrationModel() - runner = ModelRunner(model, name="m", version="1", calibrated=True) - paths = ["c/x_00.jpg", "c/x_01.jpg"] - - asyncio.run(runner.predict(paths)) - detector_fds = model.frame_detections_calls[-1] - - equivalent = SuppliedDetection(xyxyn=(0.25, 0.25, 0.75, 0.75), confidence=0.75) - asyncio.run(runner.predict(paths, detections=[[equivalent], [equivalent]])) - supplied_fds = model.frame_detections_calls[-1] - - assert supplied_fds == detector_fds -``` - -- [ ] **Step 3: Run tests to verify they fail** - -Run: `cd api && uv run pytest tests/test_model_runner.py -v -k supplied` -Expected: FAIL — `TypeError: ModelRunner.predict() got an unexpected keyword argument 'detections'`. - -- [ ] **Step 4: Implement the runner bypass** - -In `api/src/temporal_model/api/model_runner.py`: - -4a. Add imports at the top (after the existing `from temporal_model.core.stage_timer import ...` line): - -```python -from temporal_model.core.types import Detection, FrameDetections - -from .detection_cache import DetectionCache -from .schemas import SuppliedDetection -``` - -(`.detection_cache` is already imported; only add the other two lines, keeping import order ruff-clean.) - -4b. Add a module-level helper above `class ModelRunner`: - -```python -def _supplied_frame_detections( - frames: list[Any], detections: list[list[SuppliedDetection]] -) -> dict[str, FrameDetections]: - """Convert caller-supplied xyxyn boxes to per-frame ``FrameDetections``. - - ``detections`` is index-aligned with ``frames`` (lengths validated at the - HTTP boundary; ``strict=True`` is a safety net). Boxes arrive as - normalized corners and become center-based xywhn ``Detection``s; supplied - boxes are smoke by definition (``class_id=0``). - """ - resolved: dict[str, FrameDetections] = {} - for idx, (frame, boxes) in enumerate(zip(frames, detections, strict=True)): - resolved[frame.frame_id] = FrameDetections( - frame_idx=idx, - frame_id=frame.frame_id, - timestamp=frame.timestamp, - detections=[ - Detection( - class_id=0, - cx=(b.xyxyn[0] + b.xyxyn[2]) / 2.0, - cy=(b.xyxyn[1] + b.xyxyn[3]) / 2.0, - w=b.xyxyn[2] - b.xyxyn[0], - h=b.xyxyn[3] - b.xyxyn[1], - confidence=b.confidence, - ) - for b in boxes - ], - ) - return resolved -``` - -4c. Thread the parameter through `predict` (signature + docstring + threadpool call): - -```python - async def predict( - self, - frame_paths: list[Path], - *, - roi: tuple[float, float, float, float] | None = None, - detections: list[list[SuppliedDetection]] | None = None, - timer: StageTimer | None = None, - profile: dict[str, Any] | None = None, - ) -> Any: - """Resolve detections (cache + detect misses) then run the model. - - The whole orchestration runs in a worker thread under the lock, so the - cache is accessed by one prediction at a time. When ``timer``/``profile`` - are supplied, the ``detector`` stage is timed and cache counts recorded. - ``roi`` is passed through to the core model untouched — the cache stays - full-frame (see the invariant in the ROI spec). When ``detections`` is - supplied (index-aligned per-frame boxes from the caller's own - detector), the bundled detector and its cache are bypassed entirely: - no read, no write, no ``detector`` stage. - """ - async with self._lock: - return await run_in_threadpool( - self._predict_sync, frame_paths, roi, detections, timer, profile - ) -``` - -4d. Branch in `_predict_sync` right after `load_sequence`: - -```python - def _predict_sync( - self, - frame_paths: list[Path], - roi: tuple[float, float, float, float] | None = None, - detections: list[list[SuppliedDetection]] | None = None, - timer: StageTimer | None = None, - profile: dict[str, Any] | None = None, - ) -> Any: - started = time.perf_counter() - frames = self._model.load_sequence(frame_paths) - if detections is not None: - out = self._model.predict( - frames, - frame_detections=_supplied_frame_detections(frames, detections), - roi=roi, - timer=timer, - ) - if profile is not None: - profile["n_frames"] = len(frames) - profile["cache_hits"] = 0 - profile["cache_misses"] = 0 - logger.info( - "predict: supplied detections, seq_len=%d, %.0fms", - len(frames), - (time.perf_counter() - started) * 1000.0, - ) - return out - resolved: dict[str, Any] = {} - ... # existing detector-path body continues unchanged from here -``` - -(The `...` is the existing code from `resolved: dict[str, Any] = {}` onward — do not modify it.) - -- [ ] **Step 5: Run tests to verify they pass** - -Run: `cd api && uv run pytest tests/test_model_runner.py -v` -Expected: all PASS (new and pre-existing). - -- [ ] **Step 6: Commit** - -```bash -git add api/src/temporal_model/api/model_runner.py api/tests/test_model_runner.py -git commit -m "feat(api): bypass detector and cache when detections are supplied" -``` - ---- - -### Task 3: App threading + `detections_source` provenance in verbose details - -**Files:** -- Modify: `api/src/temporal_model/api/app.py` (predict handler, lines 146–169) -- Modify: `api/src/temporal_model/api/schemas.py` (`Preprocessing` ~line 100, `_to_details` ~line 142, `to_response` ~line 171) -- Test: `api/tests/test_app.py` (`FakeRunner` at lines 58–79), `api/tests/test_schemas.py` - -- [ ] **Step 1: Write the failing tests** - -1a. Append to `api/tests/test_schemas.py`: - -```python -def test_verbose_details_detections_source_request(): - out = SimpleNamespace( - is_positive=False, trigger_frame_index=None, details=_details([]) - ) - resp = to_response( - out, - name="m", - version="1", - calibrated=True, - verbose=True, - detections_source="request", - ) - assert resp.details.preprocessing.detections_source == "request" - - -def test_verbose_details_detections_source_defaults_to_detector(): - out = SimpleNamespace( - is_positive=False, trigger_frame_index=None, details=_details([]) - ) - resp = to_response(out, name="m", version="1", calibrated=True, verbose=True) - assert resp.details.preprocessing.detections_source == "detector" -``` - -1b. In `api/tests/test_app.py`, update `FakeRunner` to accept and record the new kwarg: - -```python - def __init__(self, output=None, error=None): - self._output = output - self._error = error - self.roi = None - self.detections = None - - async def predict( - self, paths, *, roi=None, detections=None, timer=None, profile=None - ): - self.roi = roi - self.detections = detections - if self._error: - raise self._error - if timer is not None: - with timer.stage("detector"): - pass - if profile is not None: - profile.update(n_frames=len(paths), cache_hits=0, cache_misses=len(paths)) - return self._output -``` - -1c. Append endpoint tests to `api/tests/test_app.py` (`KEYS` has exactly 2 frames): - -```python -def test_predict_passes_detections_to_runner(client): - r = client.post( - "/predict", - json={ - "frames": KEYS, - "detections": [ - [{"xyxyn": [0.1, 0.2, 0.3, 0.4], "confidence": 0.6}], - [], - ], - }, - ) - assert r.status_code == 200 - sent = client.app.state.runner.detections - assert sent[0][0].xyxyn == (0.1, 0.2, 0.3, 0.4) - assert sent[0][0].confidence == 0.6 - assert sent[1] == [] - - -def test_predict_without_detections_passes_none(client): - r = client.post("/predict", json={"frames": KEYS}) - assert r.status_code == 200 - assert client.app.state.runner.detections is None - - -def test_predict_detections_length_mismatch_is_400(client): - r = client.post("/predict", json={"frames": KEYS, "detections": [[]]}) - assert r.status_code == 400 - body = r.json() - assert body["code"] == "invalid_request" - assert "one entry per frame" in body["detail"] - - -def test_predict_malformed_detection_is_400(client): - r = client.post( - "/predict", - json={ - "frames": KEYS, - "detections": [ - [{"xyxyn": [0.3, 0.2, 0.1, 0.4], "confidence": 0.6}], - [], - ], - }, - ) - assert r.status_code == 400 - assert r.json()["code"] == "invalid_request" - - -def test_predict_detections_compose_with_roi(client): - r = client.post( - "/predict", - json={ - "frames": KEYS, - "detections": [[], []], - "roi_xyxyn": [0.0, 0.0, 1.0, 1.0], - }, - ) - assert r.status_code == 200 - assert client.app.state.runner.roi == (0.0, 0.0, 1.0, 1.0) - assert client.app.state.runner.detections == [[], []] - - -def test_predict_verbose_detections_source_request(client): - r = client.post( - "/predict?verbose=true", json={"frames": KEYS, "detections": [[], []]} - ) - assert r.status_code == 200 - assert r.json()["details"]["preprocessing"]["detections_source"] == "request" - - -def test_predict_verbose_detections_source_detector(client): - r = client.post("/predict?verbose=true", json={"frames": KEYS}) - assert r.status_code == 200 - assert r.json()["details"]["preprocessing"]["detections_source"] == "detector" -``` - -- [ ] **Step 2: Run tests to verify they fail** - -Run: `cd api && uv run pytest tests/test_app.py tests/test_schemas.py -v -k "detections_source or detections"` -Expected: FAIL — `to_response() got an unexpected keyword argument 'detections_source'`; endpoint tests fail with `detections_source` missing from the verbose payload and `FakeRunner.detections` never set by the app. - -- [ ] **Step 3: Implement** - -3a. In `api/src/temporal_model/api/schemas.py`, add the field to `Preprocessing`: - -```python -class Preprocessing(BaseModel): - num_frames_input: int - num_truncated: int - padded_frame_indices: list[int] - num_tube_candidates: int - num_tubes_outside_roi: int - # Provenance: "request" when the caller supplied the detections (bundled - # detector bypassed), "detector" when the bundled YOLO produced them. - detections_source: Literal["request", "detector"] -``` - -3b. Thread it through `_to_details` (add parameter, pass into `Preprocessing(...)`): - -```python -def _to_details( - details: dict[str, Any], - *, - threshold_overridden: bool, - packaged_threshold: float | None, - detections_source: Literal["request", "detector"], - profiling: dict[str, Any] | None = None, -) -> Details: -``` - -and inside the `Preprocessing(` call add `detections_source=detections_source,`. - -3c. Thread it through `to_response` (default keeps every existing caller working): - -```python -def to_response( - out: Any, - *, - name: str, - version: str | None, - calibrated: bool, - verbose: bool, - threshold_overridden: bool = False, - packaged_threshold: float | None = None, - detections_source: Literal["request", "detector"] = "detector", - profiling: dict[str, Any] | None = None, -) -> PredictResponse: -``` - -and in the `verbose` branch pass `detections_source=detections_source,` to `_to_details`. - -3d. In `api/src/temporal_model/api/app.py`, update the two call sites in `predict`: - -```python - out = await runner.predict( - paths, - roi=body.roi_xyxyn, - detections=body.detections, - timer=timer, - profile=profile, - ) -``` - -```python - return to_response( - out, - name=runner.name, - version=runner.version, - calibrated=runner.calibrated, - verbose=verbose, - threshold_overridden=runner.threshold_overridden, - packaged_threshold=runner.packaged_threshold, - detections_source=( - "request" if body.detections is not None else "detector" - ), - profiling=profiling, - ) -``` - -- [ ] **Step 4: Run tests to verify they pass** - -Run: `cd api && uv run pytest tests/test_app.py tests/test_schemas.py -v` -Expected: all PASS. - -- [ ] **Step 5: Commit** - -```bash -git add api/src/temporal_model/api/app.py api/src/temporal_model/api/schemas.py api/tests/test_app.py api/tests/test_schemas.py -git commit -m "feat(api): thread supplied detections through /predict with provenance" -``` - ---- - -### Task 4: README, lint, full suite - -**Files:** -- Modify: `api/README.md` (the `POST /predict` bullet, lines 11–16) - -- [ ] **Step 1: Update the endpoint documentation** - -Replace the `POST /predict` bullet in `api/README.md` with: - -```markdown -- `POST /predict` — body `{ "frames": ["", ...], "bucket": "", - "roi_xyxyn": [x_min, y_min, x_max, y_max], - "detections": [[{"xyxyn": [...], "confidence": 0.6}], []] }` - (ordered S3 keys; `bucket` optional, falls back to `S3_BUCKET`; - `roi_xyxyn` optional normalized region of interest — tubes with no real - detection intersecting it are dropped before scoring; - `detections` optional caller-supplied boxes, one list per frame - index-aligned with `frames`, `[]` = that frame's detector saw nothing — - skips the bundled YOLO and its cache entirely, tubes are built from the - supplied boxes); - returns `{ is_smoke, probability, model }` (`probability` = max kept-tube - calibrated probability, `null` if uncalibrated). - `POST /predict?verbose=true` adds a `details` block (decision, preprocessing, - per-tube tracks). -``` - -- [ ] **Step 2: Lint and run the full API suite** - -Run: `make -C api lint && make -C api test` -Expected: lint clean, all tests PASS. - -- [ ] **Step 3: Run the core suite (regression — core is meant to be untouched)** - -Run: `git status --short core/` (expect no modifications) and `make -C core test` -Expected: no core diffs; all core tests PASS. - -- [ ] **Step 4: Commit** - -```bash -git add api/README.md -git commit -m "docs(api): document the detections field on /predict" -``` From f05802d9f604d6c2f7e22948cb794b274dc2ec05 Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 13:56:12 +0200 Subject: [PATCH 10/12] style: ruff format the runner tests --- api/tests/test_model_runner.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/api/tests/test_model_runner.py b/api/tests/test_model_runner.py index 43d4647..e054406 100644 --- a/api/tests/test_model_runner.py +++ b/api/tests/test_model_runner.py @@ -139,9 +139,7 @@ def detect(self, frames): frame_id=f.frame_id, timestamp=None, detections=[ - Detection( - class_id=0, cx=0.5, cy=0.5, w=0.5, h=0.5, confidence=0.75 - ) + Detection(class_id=0, cx=0.5, cy=0.5, w=0.5, h=0.5, confidence=0.75) ], ) for i, f in enumerate(frames) @@ -210,9 +208,7 @@ def test_predict_with_supplied_detections_skips_detect(): model = _OrchestrationModel() runner = ModelRunner(model, name="m", version="1", calibrated=True) asyncio.run( - runner.predict( - ["c/x_00.jpg", "c/x_01.jpg"], detections=[[_supplied_box()], []] - ) + runner.predict(["c/x_00.jpg", "c/x_01.jpg"], detections=[[_supplied_box()], []]) ) assert model.detect_calls == [] @@ -270,9 +266,7 @@ def test_predict_supplied_detections_profile_counters(): runner = ModelRunner(model, name="m", version="1", calibrated=True) profile: dict = {} asyncio.run( - runner.predict( - ["c/x_00.jpg"], detections=[[_supplied_box()]], profile=profile - ) + runner.predict(["c/x_00.jpg"], detections=[[_supplied_box()]], profile=profile) ) assert profile == {"n_frames": 1, "cache_hits": 0, "cache_misses": 0} From 17a12ba3dea66d025e4c9c56cef7187f54f668ef Mon Sep 17 00:00:00 2001 From: Chouffe Date: Thu, 11 Jun 2026 14:08:21 +0200 Subject: [PATCH 11/12] docs: name the /predict caller alert-api, not platform --- docs/specs/2026-06-11-api-supplied-detections-design.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/specs/2026-06-11-api-supplied-detections-design.md b/docs/specs/2026-06-11-api-supplied-detections-design.md index 5acf7e9..758362e 100644 --- a/docs/specs/2026-06-11-api-supplied-detections-design.md +++ b/docs/specs/2026-06-11-api-supplied-detections-design.md @@ -7,7 +7,7 @@ The Pyronear edge devices (RPis running pyro-engine) already run a YOLO detector on every frame and ship the resulting bboxes with their alerts. When -the platform calls `/predict` to get a temporal verdict on an alert sequence, +the alert-api calls `/predict` to get a temporal verdict on an alert sequence, the API re-runs its own bundled YOLO on the same frames — a redundant GPU pass that adds latency and compute. This feature lets the caller supply the per-frame bboxes it already holds; the API skips the detector stage and feeds @@ -31,7 +31,7 @@ decision — runs unchanged. explicit empty list. No partial coverage, no omitted-key rules a dict-keyed shape would need. 3. **`xyxyn` + `confidence` inner objects.** Boxes arrive as normalized - corners — the convention pyro-engine produces and the platform stores, and + corners — the convention pyro-engine produces and the alert-api stores, and the same ultralytics vocabulary as the existing `roi_xyxyn` field. The API converts to the internal center-based `xywhn` (`Detection` dataclass) at the boundary. `class_id` is not exposed; supplied boxes are smoke @@ -189,7 +189,7 @@ differ in tightness, confidence distribution, and threshold; tubes built from them may shift crop geometry and calibration. The classifier scores image crops, not box metadata, so the mechanism is expected to work — but calibration on real RPi boxes is unvalidated. Validation happens at -platform-integration time, not in this work. +alert-api-integration time, not in this work. ## Testing From 922418451691c13a64a54e34ef1030ddaeccf8b8 Mon Sep 17 00:00:00 2001 From: Chouffe Date: Fri, 12 Jun 2026 08:52:23 +0200 Subject: [PATCH 12/12] fix(api): forward compute_trigger on the supplied-detections path The merge of #47 with the compute_trigger flag (#51) left the supplied-detections fast path dropping the flag: ?compute_trigger=true with caller-supplied boxes would silently skip the first-crossing search. Thread it through and pin the composition with a test. --- api/src/temporal_model/api/model_runner.py | 1 + api/tests/test_model_runner.py | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/api/src/temporal_model/api/model_runner.py b/api/src/temporal_model/api/model_runner.py index 5f46a9d..88e1fcc 100644 --- a/api/src/temporal_model/api/model_runner.py +++ b/api/src/temporal_model/api/model_runner.py @@ -199,6 +199,7 @@ def _predict_sync( frame_detections=_supplied_frame_detections(frames, detections), roi=roi, timer=timer, + compute_trigger=compute_trigger, ) if profile is not None: profile["n_frames"] = len(frames) diff --git a/api/tests/test_model_runner.py b/api/tests/test_model_runner.py index d39c2b3..f3d9e7e 100644 --- a/api/tests/test_model_runner.py +++ b/api/tests/test_model_runner.py @@ -308,6 +308,17 @@ def test_predict_supplied_detections_threads_roi(): assert model.roi_calls[-1] == (0.1, 0.2, 0.3, 0.4) +def test_predict_supplied_detections_threads_compute_trigger(): + model = _OrchestrationModel() + runner = ModelRunner(model, name="m", version="1", calibrated=True) + asyncio.run( + runner.predict( + ["c/x_00.jpg"], detections=[[_supplied_box()]], compute_trigger=True + ) + ) + assert model.trigger_calls[-1] is True + + def test_predict_supplied_matches_detector_path(): # Supplying the exact box the detector would produce (xywhn 0.5/0.5/0.5/0.5 # == xyxyn 0.25..0.75) hands the model identical FrameDetections.