From cc9163e50ea890452219cac5c8ee4bd07e332e7f Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 08:36:30 +0200 Subject: [PATCH 01/10] refactor(predictor): store frame payload opaquely in state --- pyro-predictor/pyro_predictor/predictor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyro-predictor/pyro_predictor/predictor.py b/pyro-predictor/pyro_predictor/predictor.py index 61eac3b5..b4c4cc23 100644 --- a/pyro-predictor/pyro_predictor/predictor.py +++ b/pyro-predictor/pyro_predictor/predictor.py @@ -82,11 +82,13 @@ def _new_state(self) -> Dict[str, Any]: def _update_states( self, - frame: Image.Image, + frame: Any, preds: np.ndarray, cam_key: str, encoded_bytes: Optional[bytes] = None, ) -> float: + # `frame` is stored opaquely in the window and replayed at staging time; callers may + # pass a full PIL image or any lighter payload (e.g. Engine passes a compact context crop). nb = self.nb_consecutive_frames prev_ongoing = self._states[cam_key]["ongoing"] # Hysteresis: once alerting, relax the threshold so the alert keeps emitting frames. From cde0ab39ba2454ffab0f7286f9f0517685c8a687 Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 08:47:16 +0200 Subject: [PATCH 02/10] feat(engine): compact context crops and stable per-event detection crops Store a JPEG-encoded region around the raw predictions (3x the union size, min 1024px) with its offset instead of the full decoded 4K frame (~24MB each) in the sliding window and alert queue. At staging, cluster the tracked bboxes and freeze one square crop box per cluster until the event ends, so the 224x224 crops stay centered on the same spot instead of following bbox jitter. Crops are cut from the stored context crop at send time. --- pyroengine/engine.py | 239 +++++++++++++++++++++++++++++++++++-------- tests/test_engine.py | 109 +++++++++++++++++--- 2 files changed, 291 insertions(+), 57 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index 7069a3e5..59f06f38 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -9,9 +9,10 @@ import signal import time from collections import deque +from dataclasses import dataclass from datetime import datetime from pathlib import Path -from typing import Any, Dict, Never, Optional, Tuple +from typing import Any, Dict, List, Never, Optional, Tuple import numpy as np from PIL import Image @@ -22,11 +23,30 @@ from requests.exceptions import RequestException from requests.models import Response -__all__ = ["Engine"] +__all__ = ["ContextCrop", "Engine"] logging.basicConfig(format="%(asctime)s | %(levelname)s: %(message)s", level=logging.INFO, force=True) logger = logging.getLogger(__name__) +# Context crop kept in RAM instead of the full-resolution frame: the region around the raw +# predictions is expanded by CONTEXT_PADDING (3x the union size), with a floor of CONTEXT_MIN_SIDE px. +CONTEXT_PADDING = 2.0 +CONTEXT_MIN_SIDE = 1024 +CONTEXT_JPEG_QUALITY = 95 +# Padding applied around a bbox cluster for the final 224x224 detection crops. +CROP_PADDING = 0.20 + + +@dataclass(frozen=True) +class ContextCrop: + """JPEG-encoded region of a full-resolution frame, with its position and the full frame size.""" + + jpeg: bytes + left: int + top: int + full_w: int + full_h: int + def handler(_signum: int, _frame: object) -> Never: raise TimeoutError("Heartbeat check timed out") @@ -141,6 +161,7 @@ def __init__( state["last_image_sent"] = None state["last_bbox_mask_fetch"] = None state["last_pose_image_sent"] = init_now + state["event_crop_boxes"] = [] # Occlusion masks: cam_id -> dict of bboxes (keyed by mask id) self.occlusion_masks: Dict[str, Dict[Any, Any]] = {} @@ -156,6 +177,7 @@ def _new_state(self) -> Dict[str, Any]: state["last_image_sent"] = None state["last_bbox_mask_fetch"] = None state["last_pose_image_sent"] = datetime.now() + state["event_crop_boxes"] = [] return state def heartbeat(self, cam_id: str) -> Response: @@ -272,8 +294,13 @@ def predict( preds = np.reshape(preds, (-1, 5)) logger.info(f"pred for {cam_key} : {preds}") - # Store the original frame in state so _process_alerts can crop at full resolution. - conf = self._update_states(original_frame, preds, cam_key, encoded_bytes=encoded_bytes) + # Store only a compact JPEG region around the detections so _process_alerts can crop at + # full resolution without keeping the whole original frame in RAM. + context_crop = self._build_context_crop(original_frame, preds) + conf = self._update_states(context_crop, preds, cam_key, encoded_bytes=encoded_bytes) + if not self._states[cam_key]["ongoing"]: + # Event over: drop the frozen crop boxes so the next event re-centers. + self._states[cam_key]["event_crop_boxes"] = [] if self.save_captured_frames: self._local_backup(frame, cam_id, is_alert=False, encoded_bytes=encoded_bytes) @@ -285,19 +312,29 @@ def predict( # Alert (use ongoing so hysteresis-relaxed threshold keeps staging frames during a dip) if self._states[cam_key]["ongoing"] and len(self.api_client) > 0 and isinstance(cam_id, str): + state = self._states[cam_key] # Collect every bbox the predictor emitted across the window; treat these as # tracked locations and backfill missing per-frame bboxes from raw preds with conf=0. - tracked = [b[:4] for _, _, bbs, _, _, _ in self._states[cam_key]["last_predictions"] for b in bbs] + tracked = [b[:4] for _, _, bbs, _, _, _ in state["last_predictions"] for b in bbs] tracked_arr = np.array(tracked, dtype=np.float64) if tracked else np.empty((0, 4)) - for idx, (frame_, preds_, bboxes, ts, is_staged, jpeg_bytes) in enumerate( - self._states[cam_key]["last_predictions"] - ): + # Freeze one square crop box per cluster of tracked bboxes so every frame of the + # event is cropped at the same location, even when individual bboxes move. + full_size = next(((cc.full_w, cc.full_h) for cc, *_ in state["last_predictions"] if cc is not None), None) + if tracked and full_size is not None: + self._update_event_crop_boxes(cam_key, tracked, *full_size) + + for idx, (crop_, preds_, bboxes, ts, is_staged, jpeg_bytes) in enumerate(state["last_predictions"]): if not is_staged: bboxes = self._backfill_bboxes(bboxes, preds_, tracked_arr) - self._stage_alert(frame_, cam_id, ts, bboxes, jpeg_bytes) - self._states[cam_key]["last_predictions"][idx] = ( - frame_, + crop_boxes = ( + self._assign_crop_boxes(bboxes, cam_key, *full_size) + if bboxes and full_size is not None + else None + ) + self._stage_alert(crop_, cam_id, ts, bboxes, jpeg_bytes, crop_boxes) + state["last_predictions"][idx] = ( + crop_, preds_, bboxes, ts, @@ -307,12 +344,32 @@ def predict( return float(conf) + @staticmethod + def _fit_box( + box: Tuple[float, float, float, float], img_w: float, img_h: float + ) -> Tuple[float, float, float, float]: + """Shift a box back inside the image to preserve its size; clip only if larger than the image.""" + left, top, right, bottom = box + if left < 0: + right -= left + left = 0 + if top < 0: + bottom -= top + top = 0 + if right > img_w: + left -= right - img_w + right = img_w + if bottom > img_h: + top -= bottom - img_h + bottom = img_h + return max(left, 0.0), max(top, 0.0), right, bottom + @staticmethod def _compute_crop_box( bboxes: list, img_w: int, img_h: int, - padding: float = 0.20, + padding: float = CROP_PADDING, ) -> Tuple[int, int, int, int]: """Square crop covering all bboxes (normalized coords) with `padding` on the largest dim.""" arr = np.asarray(bboxes, dtype=float) @@ -327,33 +384,119 @@ def _compute_crop_box( cx = (x1 + x2) / 2.0 cy = (y1 + y2) / 2.0 half = side / 2.0 - left, top, right, bottom = cx - half, cy - half, cx + half, cy + half - - # Shift back inside the image to keep the crop square instead of clipping. - if left < 0: - right -= left - left = 0 - if top < 0: - bottom -= top - top = 0 - if right > img_w: - left -= right - img_w - right = img_w - if bottom > img_h: - top -= bottom - img_h - bottom = img_h - + left, top, right, bottom = Engine._fit_box((cx - half, cy - half, cx + half, cy + half), img_w, img_h) return round(left), round(top), round(right), round(bottom) - def _encode_detection_crops(self, frame: Image.Image, bboxes: list) -> Optional[list[bytes]]: - """Crop the original frame around each bbox and encode one 224x224 JPEG per bbox to upload.""" - if not bboxes: + def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional[ContextCrop]: + """Encode a generous region around the raw predictions instead of keeping the full frame. + + The region is the union of all raw preds expanded to 3x its size per axis (min 1024 px), + so the per-event crop boxes computed later almost always fall inside it. + """ + if preds.shape[0] == 0: return None img_w, img_h = frame.size - crops: list[bytes] = [] + arr = np.asarray(preds, dtype=float) + x1 = float(arr[:, 0].min()) * img_w + y1 = float(arr[:, 1].min()) * img_h + x2 = float(arr[:, 2].max()) * img_w + y2 = float(arr[:, 3].max()) * img_h + + target_w = min(max((x2 - x1) * (1.0 + CONTEXT_PADDING), CONTEXT_MIN_SIDE), img_w) + target_h = min(max((y2 - y1) * (1.0 + CONTEXT_PADDING), CONTEXT_MIN_SIDE), img_h) + cx = (x1 + x2) / 2.0 + cy = (y1 + y2) / 2.0 + box = self._fit_box( + (cx - target_w / 2.0, cy - target_h / 2.0, cx + target_w / 2.0, cy + target_h / 2.0), img_w, img_h + ) + left, top, right, bottom = (round(v) for v in box) + buf = io.BytesIO() + frame.crop((left, top, right, bottom)).save(buf, format="JPEG", quality=CONTEXT_JPEG_QUALITY) + return ContextCrop(jpeg=buf.getvalue(), left=left, top=top, full_w=img_w, full_h=img_h) + + @staticmethod + def _cluster_bboxes(bboxes: list) -> List[list]: + """Group bboxes (normalized coords) into clusters of transitively overlapping boxes.""" + clusters = [[list(b[:4]), [b]] for b in bboxes] + merged = True + while merged: + merged = False + for i in range(len(clusters)): + for j in range(i + 1, len(clusters)): + a, b = clusters[i][0], clusters[j][0] + if a[0] < b[2] and a[2] > b[0] and a[1] < b[3] and a[3] > b[1]: + clusters[i][0] = [min(a[0], b[0]), min(a[1], b[1]), max(a[2], b[2]), max(a[3], b[3])] + clusters[i][1].extend(clusters[j][1]) + del clusters[j] + merged = True + break + if merged: + break + return [members for _, members in clusters] + + def _update_event_crop_boxes(self, cam_key: str, tracked_bboxes: list, full_w: int, full_h: int) -> None: + """Add a frozen square crop box for any cluster of tracked bboxes not yet covered. + + Existing boxes are never moved or resized, so all crops of one event stay centered + on the same spot regardless of bbox jitter. + """ + frozen = self._states[cam_key]["event_crop_boxes"] + uncovered = [ + bbox for bbox in tracked_bboxes if not any(self._center_in_box(bbox, box, full_w, full_h) for box in frozen) + ] + for cluster in self._cluster_bboxes(uncovered): + frozen.append(self._compute_crop_box(cluster, full_w, full_h)) + + @staticmethod + def _center_in_box(bbox: list, box: Tuple[int, int, int, int], img_w: int, img_h: int) -> bool: + cx = (bbox[0] + bbox[2]) / 2.0 * img_w + cy = (bbox[1] + bbox[3]) / 2.0 * img_h + return box[0] <= cx <= box[2] and box[1] <= cy <= box[3] + + def _assign_crop_boxes(self, bboxes: list, cam_key: str, full_w: int, full_h: int) -> list: + """Pick, for each bbox, the frozen event box with the largest overlap; add one if none overlaps.""" + frozen = self._states[cam_key]["event_crop_boxes"] + assigned = [] for bbox in bboxes: - box = self._compute_crop_box([bbox], img_w, img_h, padding=0.20) - crop = frame.crop(box) + bx1, by1 = bbox[0] * full_w, bbox[1] * full_h + bx2, by2 = bbox[2] * full_w, bbox[3] * full_h + best, best_area = None, 0.0 + for box in frozen: + inter_w = max(0.0, min(bx2, box[2]) - max(bx1, box[0])) + inter_h = max(0.0, min(by2, box[3]) - max(by1, box[1])) + if inter_w * inter_h > best_area: + best, best_area = box, inter_w * inter_h + if best is None: + best = self._compute_crop_box([bbox], full_w, full_h) + frozen.append(best) + assigned.append(best) + return assigned + + def _encode_detection_crops( + self, + context_crop: Optional[ContextCrop], + bboxes: list, + crop_boxes: Optional[list], + ) -> Optional[list[bytes]]: + """Cut one 224x224 JPEG per bbox out of the context crop, using the frozen event crop boxes.""" + if context_crop is None or not bboxes or not crop_boxes or len(crop_boxes) != len(bboxes): + return None + region = Image.open(io.BytesIO(context_crop.jpeg)) + region_w, region_h = region.size + crops: list[bytes] = [] + for box in crop_boxes: + local = self._fit_box( + ( + box[0] - context_crop.left, + box[1] - context_crop.top, + box[2] - context_crop.left, + box[3] - context_crop.top, + ), + region_w, + region_h, + ) + lx1, ly1, lx2, ly2 = (round(v) for v in local) + crop = region.crop((lx1, ly1, lx2, ly2)) crop_w, crop_h = crop.size downscaling = crop_w > 224 or crop_h > 224 if (crop_w, crop_h) != (224, 224): @@ -386,21 +529,23 @@ def _backfill_bboxes(bboxes: list, preds: np.ndarray, tracked: np.ndarray) -> li def _stage_alert( self, - frame: Image.Image, + context_crop: Optional[ContextCrop], cam_id: str, ts: int, bboxes: list, jpeg_bytes: Optional[bytes] = None, + crop_boxes: Optional[list] = None, ) -> None: # Store information in the queue self._alerts.append({ - "frame": frame, + "context_crop": context_crop, "cam_id": cam_id, "ts": ts, "media_id": None, "alert_id": None, "bboxes": bboxes, "jpeg_bytes": jpeg_bytes, + "crop_boxes": crop_boxes, }) def fill_empty_bboxes(self) -> None: @@ -422,7 +567,7 @@ def _process_alerts(self) -> None: # Save alert on device if self.save_detections_frames: self._local_backup( - frame_info["frame"], + None, cam_id, encoded_bytes=frame_info.get("jpeg_bytes"), ) @@ -436,12 +581,14 @@ def _process_alerts(self) -> None: continue jpeg_bytes = frame_info.get("jpeg_bytes") if jpeg_bytes is None: - # Fallback for cached alerts staged before this version - stream = io.BytesIO() - frame_info["frame"].save(stream, format="JPEG", quality=self.jpeg_quality) - jpeg_bytes = stream.getvalue() + # The full frame is no longer kept in RAM, so there is nothing to re-encode. + logger.warning(f"Camera '{cam_id}' - skipping alert without encoded frame") + self._alerts.popleft() + continue bboxes = [tuple(bboxe) for bboxe in bboxes] - crops = self._encode_detection_crops(frame_info["frame"], bboxes) + crops = self._encode_detection_crops( + frame_info.get("context_crop"), bboxes, frame_info.get("crop_boxes") + ) _, pose_id = self.cam_creds[cam_id] ip = cam_id.split("_")[0] response = self.api_client[ip].create_detection(jpeg_bytes, bboxes, pose_id, crops=crops) @@ -463,7 +610,7 @@ def _process_alerts(self) -> None: def _local_backup( self, - img: Image.Image, + img: Optional[Image.Image], cam_id: Optional[str], is_alert: bool = True, encoded_bytes: Optional[bytes] = None, @@ -471,12 +618,14 @@ def _local_backup( """Save image on device Args: - img (Image.Image): Image to save + img: Image to save; may be None when `encoded_bytes` is provided cam_id (str): camera id (ip address) is_alert (bool): is the frame an alert ? encoded_bytes: pre-encoded JPEG bytes — written verbatim when provided so the on-disk file is byte-identical to what was scored / uploaded. """ + if img is None and encoded_bytes is None: + return folder = "alerts" if is_alert else "save" backup_cache = self._cache.joinpath(f"backup/{folder}/") self._clean_local_backup(backup_cache) # Dump old cache @@ -485,7 +634,7 @@ def _local_backup( file = backup_cache.joinpath(f"{time.strftime('%Y%m%d-%H%M%S')}.jpg") if encoded_bytes is not None: file.write_bytes(encoded_bytes) - else: + elif img is not None: img.save(file) def _clean_local_backup(self, backup_cache: Path) -> None: diff --git a/tests/test_engine.py b/tests/test_engine.py index 9fbf26f0..00076f74 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -6,12 +6,13 @@ from pathlib import Path from unittest.mock import MagicMock, patch +import numpy as np import onnx import pytest from dotenv import load_dotenv from PIL import Image -from pyroengine.engine import Engine +from pyroengine.engine import ContextCrop, Engine def test_engine_offline(tmpdir_factory, mock_wildfire_image, mock_forest_image): @@ -36,7 +37,8 @@ def test_engine_offline(tmpdir_factory, mock_wildfire_image, mock_forest_image): assert 0 <= out <= 1 assert len(engine._states["-1"]["last_predictions"]) == 1 assert engine._states["-1"]["ongoing"] is False - assert isinstance(engine._states["-1"]["last_predictions"][0][0], Image.Image) + # No raw preds on the forest image: nothing is kept in RAM for that frame + assert engine._states["-1"]["last_predictions"][0][0] is None assert engine._states["-1"]["last_predictions"][0][1].shape[0] == 0 assert engine._states["-1"]["last_predictions"][0][1].shape[1] == 5 assert engine._states["-1"]["last_predictions"][0][3] < datetime.now().isoformat() @@ -47,7 +49,9 @@ def test_engine_offline(tmpdir_factory, mock_wildfire_image, mock_forest_image): assert 0 <= out <= 1 assert len(engine._states["-1"]["last_predictions"]) == 2 assert engine._states["-1"]["ongoing"] is False - assert isinstance(engine._states["-1"]["last_predictions"][0][0], Image.Image) + assert engine._states["-1"]["last_predictions"][0][0] is None + # Wildfire frame has raw preds: a compact context crop is kept instead of the full frame + assert isinstance(engine._states["-1"]["last_predictions"][1][0], ContextCrop) assert engine._states["-1"]["last_predictions"][1][1].shape[0] > 0 assert engine._states["-1"]["last_predictions"][1][1].shape[1] == 5 assert engine._states["-1"]["last_predictions"][1][3] < datetime.now().isoformat() @@ -58,7 +62,8 @@ def test_engine_offline(tmpdir_factory, mock_wildfire_image, mock_forest_image): assert 0 <= out <= 1 assert len(engine._states["-1"]["last_predictions"]) == 3 assert engine._states["-1"]["ongoing"] - assert isinstance(engine._states["-1"]["last_predictions"][0][0], Image.Image) + assert engine._states["-1"]["last_predictions"][0][0] is None + assert isinstance(engine._states["-1"]["last_predictions"][2][0], ContextCrop) assert engine._states["-1"]["last_predictions"][2][1].shape[0] > 0 assert engine._states["-1"]["last_predictions"][2][1].shape[1] == 5 assert engine._states["-1"]["last_predictions"][2][3] < datetime.now().isoformat() @@ -69,7 +74,8 @@ def test_engine_offline(tmpdir_factory, mock_wildfire_image, mock_forest_image): assert 0 <= out <= 1 assert len(engine._states["-1"]["last_predictions"]) == 4 assert engine._states["-1"]["ongoing"] - assert isinstance(engine._states["-1"]["last_predictions"][0][0], Image.Image) + assert engine._states["-1"]["last_predictions"][0][0] is None + assert isinstance(engine._states["-1"]["last_predictions"][-1][0], ContextCrop) assert engine._states["-1"]["last_predictions"][-1][1].shape[0] > 0 assert engine._states["-1"]["last_predictions"][-1][1].shape[1] == 5 assert len(engine._states["-1"]["last_predictions"][-1][2][0]) == 5 @@ -202,11 +208,14 @@ def test_process_alerts_respects_save_detections_flag(tmp_path, save_detections_ ) # Provide a non-empty bbox list so the API accepts the payload + buf = io.BytesIO() + Image.new("RGB", (8, 8)).save(buf, format="JPEG") engine._stage_alert( - Image.new("RGB", (8, 8)), + None, "dummy_cam", int(time.time()), bboxes=[(0.1, 0.1, 0.2, 0.2, 0.9)], + jpeg_bytes=buf.getvalue(), ) with patch.object(engine, "_local_backup") as mock_backup: @@ -223,7 +232,6 @@ def test_fill_empty_bboxes(tmp_path): and leaves non-empty alerts untouched.""" engine = Engine(cache_folder=str(tmp_path)) - img = Image.new("RGB", (8, 8)) cam_id = "169.254.7.3_3" bboxes_seq = [ [(0.436, 0.609, 0.44, 0.62, 0.089)], @@ -234,7 +242,7 @@ def test_fill_empty_bboxes(tmp_path): [(0.436, 0.609, 0.44, 0.62, 0.389)], ] for i, bboxes in enumerate(bboxes_seq): - engine._stage_alert(img, cam_id, i, bboxes=bboxes) + engine._stage_alert(None, cam_id, i, bboxes=bboxes) engine.fill_empty_bboxes() @@ -250,15 +258,82 @@ def test_fill_empty_bboxes_all_empty_for_cam(tmp_path): """Even when every alert for a cam_id is empty, each one gets the placeholder.""" engine = Engine(cache_folder=str(tmp_path)) - img = Image.new("RGB", (8, 8)) for i in range(3): - engine._stage_alert(img, "169.254.7.3_3", i, bboxes=[]) + engine._stage_alert(None, "169.254.7.3_3", i, bboxes=[]) engine.fill_empty_bboxes() assert all(alert["bboxes"] == [(0.0, 0.0, 0.0001, 0.0001, 0.0)] for alert in engine._alerts) +def test_build_context_crop(tmp_path): + """_build_context_crop keeps a compact JPEG region covering all raw preds, or None without preds.""" + engine = Engine(cache_folder=str(tmp_path)) + frame = Image.new("RGB", (2560, 1440)) + + assert engine._build_context_crop(frame, np.empty((0, 5))) is None + + preds = np.array([[0.4, 0.4, 0.45, 0.45, 0.8]]) + context = engine._build_context_crop(frame, preds) + assert isinstance(context, ContextCrop) + assert (context.full_w, context.full_h) == (2560, 1440) + region = Image.open(io.BytesIO(context.jpeg)) + # Region respects the 1024px floor and contains the pred area + assert min(region.size) >= 1024 + assert context.left <= 0.4 * 2560 + assert context.left + region.size[0] >= 0.45 * 2560 + assert context.top <= 0.4 * 1440 + assert context.top + region.size[1] >= 0.45 * 1440 + # The point of the change: the stored payload is much smaller than the decoded frame + assert len(context.jpeg) < 2560 * 1440 * 3 / 10 + + +def test_cluster_bboxes(): + """Overlapping bboxes merge (transitively); distant ones stay separate.""" + a = (0.10, 0.10, 0.20, 0.20, 0.9) + b = (0.15, 0.15, 0.25, 0.25, 0.8) # overlaps a + c = (0.24, 0.24, 0.30, 0.30, 0.7) # overlaps b only -> same cluster via transitivity + d = (0.80, 0.80, 0.90, 0.90, 0.6) # far away + + clusters = Engine._cluster_bboxes([a, d, b, c]) + + assert len(clusters) == 2 + sizes = sorted(len(members) for members in clusters) + assert sizes == [1, 3] + + +def test_event_crop_boxes_frozen(tmp_path): + """The crop box assigned to a jittering bbox stays identical across frames of one event.""" + engine = Engine(cache_folder=str(tmp_path)) + cam_key = "dummy_cam" + engine._states[cam_key] = engine._new_state() + full_w, full_h = 3840, 2160 + + bbox_t0 = [0.40, 0.40, 0.45, 0.45, 0.8] + engine._update_event_crop_boxes(cam_key, [bbox_t0], full_w, full_h) + assert len(engine._states[cam_key]["event_crop_boxes"]) == 1 + box_t0 = engine._assign_crop_boxes([bbox_t0], cam_key, full_w, full_h)[0] + + # Slightly moved bbox on the next frame: same frozen box, no new one + bbox_t1 = [0.41, 0.39, 0.46, 0.44, 0.7] + engine._update_event_crop_boxes(cam_key, [bbox_t0, bbox_t1], full_w, full_h) + box_t1 = engine._assign_crop_boxes([bbox_t1], cam_key, full_w, full_h)[0] + assert box_t1 == box_t0 + assert len(engine._states[cam_key]["event_crop_boxes"]) == 1 + + # A second detection far away gets its own frozen box, the first one is untouched + bbox_far = [0.80, 0.80, 0.85, 0.85, 0.6] + engine._update_event_crop_boxes(cam_key, [bbox_t1, bbox_far], full_w, full_h) + assert len(engine._states[cam_key]["event_crop_boxes"]) == 2 + boxes = engine._assign_crop_boxes([bbox_t1, bbox_far], cam_key, full_w, full_h) + assert boxes[0] == box_t0 + assert boxes[1] != box_t0 + + # Event over: frozen boxes are reset through state + engine._states[cam_key]["event_crop_boxes"] = [] + assert engine._states[cam_key]["event_crop_boxes"] == [] + + def test_encode_detection_crops_one_per_bbox(tmp_path): """_encode_detection_crops returns one 224x224 JPEG per bbox, aligned by index.""" engine = Engine(cache_folder=str(tmp_path)) @@ -270,8 +345,10 @@ def test_encode_detection_crops_one_per_bbox(tmp_path): (0.05, 0.05, 0.15, 0.15, 0.9), (0.8, 0.7, 0.95, 0.9, 0.5), ] + context = engine._build_context_crop(frame, np.array([list(b) for b in bboxes])) + crop_boxes = [engine._compute_crop_box([b], 1280, 720) for b in bboxes] - crops = engine._encode_detection_crops(frame, bboxes) + crops = engine._encode_detection_crops(context, bboxes, crop_boxes) assert crops is not None assert len(crops) == len(bboxes) @@ -282,7 +359,15 @@ def test_encode_detection_crops_one_per_bbox(tmp_path): # Distant bboxes must yield different crops, not one shared global crop assert crops[0] != crops[1] - assert engine._encode_detection_crops(frame, []) is None + # First crop covers the red-painted region + first = Image.open(io.BytesIO(crops[0])).convert("RGB") + r, g, b = first.getpixel((112, 112)) + assert r > 150 + assert g < 100 + assert b < 100 + + assert engine._encode_detection_crops(context, [], None) is None + assert engine._encode_detection_crops(None, bboxes, crop_boxes) is None def _build_engine_with_pose_stub(tmp_path, init_clock): From 0f59a2aa6e952f939d791971ba68a7f385c2aeda Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 10:00:33 +0200 Subject: [PATCH 03/10] fix(engine): cover square crops for elongated and growing detections Size both context-crop axes on the largest side of the preds union so the square detection crop always fits, re-anchor the frozen box when a bbox grows mostly outside it, and re-square clipped crops to avoid aspect distortion. --- pyroengine/engine.py | 45 +++++++++++++++++++++++++++++++++----------- tests/test_engine.py | 20 +++++++++++++++++--- 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index 59f06f38..7cb2b308 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -35,6 +35,9 @@ CONTEXT_JPEG_QUALITY = 95 # Padding applied around a bbox cluster for the final 224x224 detection crops. CROP_PADDING = 0.20 +# Fraction of a bbox that must fall inside a frozen event crop box to reuse it; below this +# (e.g. a plume that outgrew its box) a new frozen box is added so the crop re-anchors once. +MIN_BBOX_COVERAGE = 0.8 @dataclass(frozen=True) @@ -390,8 +393,8 @@ def _compute_crop_box( def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional[ContextCrop]: """Encode a generous region around the raw predictions instead of keeping the full frame. - The region is the union of all raw preds expanded to 3x its size per axis (min 1024 px), - so the per-event crop boxes computed later almost always fall inside it. + Both axes are sized on the largest side of the raw preds union, expanded 3x (min 1024 px), + so the square per-event crop boxes computed later almost always fall inside it. """ if preds.shape[0] == 0: return None @@ -402,8 +405,9 @@ def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional x2 = float(arr[:, 2].max()) * img_w y2 = float(arr[:, 3].max()) * img_h - target_w = min(max((x2 - x1) * (1.0 + CONTEXT_PADDING), CONTEXT_MIN_SIDE), img_w) - target_h = min(max((y2 - y1) * (1.0 + CONTEXT_PADDING), CONTEXT_MIN_SIDE), img_h) + side = max(x2 - x1, y2 - y1) * (1.0 + CONTEXT_PADDING) + target_w = min(max(side, CONTEXT_MIN_SIDE), img_w) + target_h = min(max(side, CONTEXT_MIN_SIDE), img_h) cx = (x1 + x2) / 2.0 cy = (y1 + y2) / 2.0 box = self._fit_box( @@ -437,21 +441,32 @@ def _cluster_bboxes(bboxes: list) -> List[list]: def _update_event_crop_boxes(self, cam_key: str, tracked_bboxes: list, full_w: int, full_h: int) -> None: """Add a frozen square crop box for any cluster of tracked bboxes not yet covered. - Existing boxes are never moved or resized, so all crops of one event stay centered - on the same spot regardless of bbox jitter. + Existing boxes are never moved or resized, so all crops of one event stay centered on the + same spot regardless of bbox jitter. A bbox that grew mostly outside its box (coverage below + MIN_BBOX_COVERAGE) gets a new frozen box, so the crop re-anchors once instead of drifting. """ frozen = self._states[cam_key]["event_crop_boxes"] uncovered = [ - bbox for bbox in tracked_bboxes if not any(self._center_in_box(bbox, box, full_w, full_h) for box in frozen) + bbox + for bbox in tracked_bboxes + if not any(self._bbox_coverage(bbox, box, full_w, full_h) >= MIN_BBOX_COVERAGE for box in frozen) ] for cluster in self._cluster_bboxes(uncovered): frozen.append(self._compute_crop_box(cluster, full_w, full_h)) @staticmethod - def _center_in_box(bbox: list, box: Tuple[int, int, int, int], img_w: int, img_h: int) -> bool: - cx = (bbox[0] + bbox[2]) / 2.0 * img_w - cy = (bbox[1] + bbox[3]) / 2.0 * img_h - return box[0] <= cx <= box[2] and box[1] <= cy <= box[3] + def _bbox_coverage(bbox: list, box: Tuple[int, int, int, int], img_w: int, img_h: int) -> float: + """Fraction of the bbox area (normalized coords) covered by the pixel box.""" + bx1, by1 = bbox[0] * img_w, bbox[1] * img_h + bx2, by2 = bbox[2] * img_w, bbox[3] * img_h + inter_w = max(0.0, min(bx2, box[2]) - max(bx1, box[0])) + inter_h = max(0.0, min(by2, box[3]) - max(by1, box[1])) + area = (bx2 - bx1) * (by2 - by1) + if area <= 0: + # Degenerate bbox: covered if its center falls inside the box + cx, cy = (bx1 + bx2) / 2.0, (by1 + by2) / 2.0 + return 1.0 if box[0] <= cx <= box[2] and box[1] <= cy <= box[3] else 0.0 + return inter_w * inter_h / area def _assign_crop_boxes(self, bboxes: list, cam_key: str, full_w: int, full_h: int) -> list: """Pick, for each bbox, the frozen event box with the largest overlap; add one if none overlaps.""" @@ -496,6 +511,14 @@ def _encode_detection_crops( region_h, ) lx1, ly1, lx2, ly2 = (round(v) for v in local) + # A frozen box larger than the stored region gets clipped above; re-square the + # crop on its center so the 224x224 resize never distorts the aspect ratio. + w, h = lx2 - lx1, ly2 - ly1 + if w != h: + side = min(w, h) + lx1 += (w - side) // 2 + ly1 += (h - side) // 2 + lx2, ly2 = lx1 + side, ly1 + side crop = region.crop((lx1, ly1, lx2, ly2)) crop_w, crop_h = crop.size downscaling = crop_w > 224 or crop_h > 224 diff --git a/tests/test_engine.py b/tests/test_engine.py index 00076f74..082d7860 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -287,6 +287,16 @@ def test_build_context_crop(tmp_path): # The point of the change: the stored payload is much smaller than the decoded frame assert len(context.jpeg) < 2560 * 1440 * 3 / 10 + # Elongated bbox: both axes are sized on the largest union side so the square + # detection crop computed later (1.2x that side) always fits inside the region + frame = Image.new("RGB", (3840, 2160)) + preds = np.array([[0.10, 0.45, 0.49, 0.50, 0.8]]) # ~1500x108 px + context = engine._build_context_crop(frame, preds) + region = Image.open(io.BytesIO(context.jpeg)) + crop_box = engine._compute_crop_box(preds.tolist(), 3840, 2160) + assert region.size[1] >= crop_box[3] - crop_box[1] + assert region.size[0] >= crop_box[2] - crop_box[0] + def test_cluster_bboxes(): """Overlapping bboxes merge (transitively); distant ones stay separate.""" @@ -329,9 +339,13 @@ def test_event_crop_boxes_frozen(tmp_path): assert boxes[0] == box_t0 assert boxes[1] != box_t0 - # Event over: frozen boxes are reset through state - engine._states[cam_key]["event_crop_boxes"] = [] - assert engine._states[cam_key]["event_crop_boxes"] == [] + # A plume that outgrew its frozen box re-anchors on a new, larger box + bbox_grown = [0.35, 0.35, 0.50, 0.50, 0.9] + engine._update_event_crop_boxes(cam_key, [bbox_grown, bbox_far], full_w, full_h) + assert len(engine._states[cam_key]["event_crop_boxes"]) == 3 + box_grown = engine._assign_crop_boxes([bbox_grown], cam_key, full_w, full_h)[0] + assert box_grown != box_t0 + assert box_grown[2] - box_grown[0] > box_t0[2] - box_t0[0] def test_encode_detection_crops_one_per_bbox(tmp_path): From a2a34f8f5c22b817cf7bdcd9b82e6e91ea1afe3f Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 10:28:29 +0200 Subject: [PATCH 04/10] fix(engine): cap context crop size with a fixed margin Size the stored region from the final crop box plus a fixed jitter margin instead of 3x the bbox, so a very large detection cannot grow the in-RAM region toward the full frame. --- pyroengine/engine.py | 18 +++++++++++------- tests/test_engine.py | 10 ++++++++++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index 7cb2b308..15908664 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -28,9 +28,11 @@ logging.basicConfig(format="%(asctime)s | %(levelname)s: %(message)s", level=logging.INFO, force=True) logger = logging.getLogger(__name__) -# Context crop kept in RAM instead of the full-resolution frame: the region around the raw -# predictions is expanded by CONTEXT_PADDING (3x the union size), with a floor of CONTEXT_MIN_SIDE px. -CONTEXT_PADDING = 2.0 +# Context crop kept in RAM instead of the full-resolution frame: the region is sized on what the +# final crop needs (the frozen box, CROP_PADDING around the bbox) plus a fixed jitter margin on +# each side, with a floor of CONTEXT_MIN_SIDE px. The margin is fixed (not a multiple of the bbox) +# so a very large detection cannot blow the stored region up toward the full frame. +CONTEXT_MARGIN = 384 CONTEXT_MIN_SIDE = 1024 CONTEXT_JPEG_QUALITY = 95 # Padding applied around a bbox cluster for the final 224x224 detection crops. @@ -391,10 +393,12 @@ def _compute_crop_box( return round(left), round(top), round(right), round(bottom) def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional[ContextCrop]: - """Encode a generous region around the raw predictions instead of keeping the full frame. + """Encode a region around the raw predictions instead of keeping the full frame. - Both axes are sized on the largest side of the raw preds union, expanded 3x (min 1024 px), - so the square per-event crop boxes computed later almost always fall inside it. + Sized on what the final crop needs (the square frozen box, CROP_PADDING around the largest + preds-union side) plus a fixed CONTEXT_MARGIN on each side to absorb bbox jitter between the + frozen box and this frame, with a floor of CONTEXT_MIN_SIDE px. The fixed margin caps the + region size for very large detections instead of scaling it with the bbox. """ if preds.shape[0] == 0: return None @@ -405,7 +409,7 @@ def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional x2 = float(arr[:, 2].max()) * img_w y2 = float(arr[:, 3].max()) * img_h - side = max(x2 - x1, y2 - y1) * (1.0 + CONTEXT_PADDING) + side = max(x2 - x1, y2 - y1) * (1.0 + CROP_PADDING) + 2.0 * CONTEXT_MARGIN target_w = min(max(side, CONTEXT_MIN_SIDE), img_w) target_h = min(max(side, CONTEXT_MIN_SIDE), img_h) cx = (x1 + x2) / 2.0 diff --git a/tests/test_engine.py b/tests/test_engine.py index 082d7860..c2ff9213 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -297,6 +297,16 @@ def test_build_context_crop(tmp_path): assert region.size[1] >= crop_box[3] - crop_box[1] assert region.size[0] >= crop_box[2] - crop_box[0] + # A large bbox is capped to the crop box plus a fixed margin, not blown up toward the + # full frame, so RAM stays bounded. The crop box still fits inside the region. + preds = np.array([[0.30, 0.30, 0.69, 0.69, 0.8]]) # ~1500x842 px square-ish, large + context = engine._build_context_crop(frame, preds) + region = Image.open(io.BytesIO(context.jpeg)) + crop_box = engine._compute_crop_box(preds.tolist(), 3840, 2160) + assert region.size[0] < 3840 # not the full frame width + assert region.size[0] >= crop_box[2] - crop_box[0] + assert region.size[1] >= crop_box[3] - crop_box[1] + def test_cluster_bboxes(): """Overlapping bboxes merge (transitively); distant ones stay separate.""" From 5b12d288f02b61e2fb19b4aa735fe0807b9406d1 Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 10:32:31 +0200 Subject: [PATCH 05/10] feat(engine): cap context-crop RAM by downscaling, keep wide field of view Restore a wide field of view (3x the preds-union side) so the frozen crop box stays inside the stored region even when smoke drifts with the wind. Bound RAM by downscaling the stored pixels above CONTEXT_MAX_SIDE instead of narrowing the view: small regions keep full resolution, large ones are downscaled. Track the region box in full-frame coords so crop extraction maps coordinates through the downscale factor. --- pyroengine/engine.py | 57 +++++++++++++++++++++++++++++++------------- tests/test_engine.py | 30 ++++++++++++----------- 2 files changed, 56 insertions(+), 31 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index 15908664..5dc6df96 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -28,12 +28,14 @@ logging.basicConfig(format="%(asctime)s | %(levelname)s: %(message)s", level=logging.INFO, force=True) logger = logging.getLogger(__name__) -# Context crop kept in RAM instead of the full-resolution frame: the region is sized on what the -# final crop needs (the frozen box, CROP_PADDING around the bbox) plus a fixed jitter margin on -# each side, with a floor of CONTEXT_MIN_SIDE px. The margin is fixed (not a multiple of the bbox) -# so a very large detection cannot blow the stored region up toward the full frame. -CONTEXT_MARGIN = 384 +# Context crop kept in RAM instead of the full-resolution frame. The region keeps a wide field of +# view (CONTEXT_PADDING -> 3x the preds-union side, floor CONTEXT_MIN_SIDE) so crops stay stable +# even when smoke drifts a lot with the wind. RAM is bounded by downscaling the stored pixels above +# CONTEXT_MAX_SIDE instead of narrowing the field of view: small regions keep full resolution (no +# detail loss), large ones are downscaled, which is fine since the final crop is 224x224 anyway. +CONTEXT_PADDING = 2.0 CONTEXT_MIN_SIDE = 1024 +CONTEXT_MAX_SIDE = 1536 CONTEXT_JPEG_QUALITY = 95 # Padding applied around a bbox cluster for the final 224x224 detection crops. CROP_PADDING = 0.20 @@ -44,11 +46,18 @@ @dataclass(frozen=True) class ContextCrop: - """JPEG-encoded region of a full-resolution frame, with its position and the full frame size.""" + """JPEG region of a full-resolution frame, kept in RAM instead of the whole frame. + + (left, top, right, bottom) is the region's box in full-frame pixel coords. The stored JPEG may + be downscaled below that box size to cap RAM, so coordinates are mapped using the decoded JPEG + size, not the box size. + """ jpeg: bytes left: int top: int + right: int + bottom: int full_w: int full_h: int @@ -395,10 +404,10 @@ def _compute_crop_box( def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional[ContextCrop]: """Encode a region around the raw predictions instead of keeping the full frame. - Sized on what the final crop needs (the square frozen box, CROP_PADDING around the largest - preds-union side) plus a fixed CONTEXT_MARGIN on each side to absorb bbox jitter between the - frozen box and this frame, with a floor of CONTEXT_MIN_SIDE px. The fixed margin caps the - region size for very large detections instead of scaling it with the bbox. + The field of view is wide (3x the largest preds-union side, floor CONTEXT_MIN_SIDE) so the + frozen crop box stays inside it even when smoke drifts with the wind. RAM is bounded by + downscaling the pixels above CONTEXT_MAX_SIDE, not by narrowing the view: small regions keep + full resolution, large ones are downscaled (harmless since the final crop is 224x224). """ if preds.shape[0] == 0: return None @@ -409,7 +418,7 @@ def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional x2 = float(arr[:, 2].max()) * img_w y2 = float(arr[:, 3].max()) * img_h - side = max(x2 - x1, y2 - y1) * (1.0 + CROP_PADDING) + 2.0 * CONTEXT_MARGIN + side = max(x2 - x1, y2 - y1) * (1.0 + CONTEXT_PADDING) target_w = min(max(side, CONTEXT_MIN_SIDE), img_w) target_h = min(max(side, CONTEXT_MIN_SIDE), img_h) cx = (x1 + x2) / 2.0 @@ -418,9 +427,19 @@ def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional (cx - target_w / 2.0, cy - target_h / 2.0, cx + target_w / 2.0, cy + target_h / 2.0), img_w, img_h ) left, top, right, bottom = (round(v) for v in box) + region = frame.crop((left, top, right, bottom)) + longest = max(region.size) + if longest > CONTEXT_MAX_SIDE: + scale = CONTEXT_MAX_SIDE / longest + region = region.resize( + (max(1, round(region.size[0] * scale)), max(1, round(region.size[1] * scale))), + Image.LANCZOS, # type: ignore[attr-defined] + ) buf = io.BytesIO() - frame.crop((left, top, right, bottom)).save(buf, format="JPEG", quality=CONTEXT_JPEG_QUALITY) - return ContextCrop(jpeg=buf.getvalue(), left=left, top=top, full_w=img_w, full_h=img_h) + region.save(buf, format="JPEG", quality=CONTEXT_JPEG_QUALITY) + return ContextCrop( + jpeg=buf.getvalue(), left=left, top=top, right=right, bottom=bottom, full_w=img_w, full_h=img_h + ) @staticmethod def _cluster_bboxes(bboxes: list) -> List[list]: @@ -502,14 +521,18 @@ def _encode_detection_crops( return None region = Image.open(io.BytesIO(context_crop.jpeg)) region_w, region_h = region.size + # The stored region may be downscaled, so map full-frame crop boxes through the actual + # JPEG-to-region scale rather than assuming 1:1 with the full-frame box. + scale_x = region_w / max(context_crop.right - context_crop.left, 1) + scale_y = region_h / max(context_crop.bottom - context_crop.top, 1) crops: list[bytes] = [] for box in crop_boxes: local = self._fit_box( ( - box[0] - context_crop.left, - box[1] - context_crop.top, - box[2] - context_crop.left, - box[3] - context_crop.top, + (box[0] - context_crop.left) * scale_x, + (box[1] - context_crop.top) * scale_y, + (box[2] - context_crop.left) * scale_x, + (box[3] - context_crop.top) * scale_y, ), region_w, region_h, diff --git a/tests/test_engine.py b/tests/test_engine.py index c2ff9213..f75adfbd 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -278,34 +278,36 @@ def test_build_context_crop(tmp_path): assert isinstance(context, ContextCrop) assert (context.full_w, context.full_h) == (2560, 1440) region = Image.open(io.BytesIO(context.jpeg)) - # Region respects the 1024px floor and contains the pred area - assert min(region.size) >= 1024 + # Small region kept at full resolution (no downscale): box size equals JPEG pixel size + assert region.size == (context.right - context.left, context.bottom - context.top) + # Field of view respects the 1024px floor and contains the pred area + assert context.right - context.left >= 1024 assert context.left <= 0.4 * 2560 - assert context.left + region.size[0] >= 0.45 * 2560 + assert context.right >= 0.45 * 2560 assert context.top <= 0.4 * 1440 - assert context.top + region.size[1] >= 0.45 * 1440 + assert context.bottom >= 0.45 * 1440 # The point of the change: the stored payload is much smaller than the decoded frame assert len(context.jpeg) < 2560 * 1440 * 3 / 10 - # Elongated bbox: both axes are sized on the largest union side so the square - # detection crop computed later (1.2x that side) always fits inside the region + # Wide field of view: 3x the largest preds-union side, so the frozen crop box stays + # inside it even when the bbox drifts. The detection crop box fits in full-frame coords. frame = Image.new("RGB", (3840, 2160)) preds = np.array([[0.10, 0.45, 0.49, 0.50, 0.8]]) # ~1500x108 px context = engine._build_context_crop(frame, preds) - region = Image.open(io.BytesIO(context.jpeg)) crop_box = engine._compute_crop_box(preds.tolist(), 3840, 2160) - assert region.size[1] >= crop_box[3] - crop_box[1] - assert region.size[0] >= crop_box[2] - crop_box[0] + assert context.right - context.left >= crop_box[2] - crop_box[0] + assert context.bottom - context.top >= crop_box[3] - crop_box[1] - # A large bbox is capped to the crop box plus a fixed margin, not blown up toward the - # full frame, so RAM stays bounded. The crop box still fits inside the region. + # A large region is bounded by downscaling the pixels, not by narrowing the view: + # the JPEG side is capped while the full-frame box stays wide. preds = np.array([[0.30, 0.30, 0.69, 0.69, 0.8]]) # ~1500x842 px square-ish, large context = engine._build_context_crop(frame, preds) region = Image.open(io.BytesIO(context.jpeg)) + assert max(region.size) <= 1536 # pixels capped (CONTEXT_MAX_SIDE) + assert region.size != (context.right - context.left, context.bottom - context.top) # downscaled crop_box = engine._compute_crop_box(preds.tolist(), 3840, 2160) - assert region.size[0] < 3840 # not the full frame width - assert region.size[0] >= crop_box[2] - crop_box[0] - assert region.size[1] >= crop_box[3] - crop_box[1] + assert context.right - context.left >= crop_box[2] - crop_box[0] + assert context.bottom - context.top >= crop_box[3] - crop_box[1] def test_cluster_bboxes(): From 32a619ae45306943b9561a76dd97707ab4a44a8e Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 10:51:52 +0200 Subject: [PATCH 06/10] fix(engine): gate crop-box reuse on coverage and bound box count Assign a frozen crop box only when it covers the bbox by at least MIN_BBOX_COVERAGE, so a drifted backfilled pred is no longer cropped on a box it merely grazes. Dedupe boxes by IoU before appending so an oversized cluster (uncoverable by a square crop) cannot grow event_crop_boxes on every frame of an event. --- pyroengine/engine.py | 48 +++++++++++++++++++++++++++++++++----------- tests/test_engine.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 12 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index 5dc6df96..70fe9aba 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -475,7 +475,27 @@ def _update_event_crop_boxes(self, cam_key: str, tracked_bboxes: list, full_w: i if not any(self._bbox_coverage(bbox, box, full_w, full_h) >= MIN_BBOX_COVERAGE for box in frozen) ] for cluster in self._cluster_bboxes(uncovered): - frozen.append(self._compute_crop_box(cluster, full_w, full_h)) + self._add_crop_box(frozen, self._compute_crop_box(cluster, full_w, full_h)) + + @staticmethod + def _add_crop_box(frozen: list, box: Tuple[int, int, int, int]) -> None: + """Append a frozen box unless a near-identical one already exists. + + A cluster too large to be covered by a square crop stays uncovered every frame and would + otherwise re-append the same capped box forever; the IoU guard bounds the box count. + """ + if not any(Engine._box_iou(box, existing) > 0.9 for existing in frozen): + frozen.append(box) + + @staticmethod + def _box_iou(a: Tuple[int, int, int, int], b: Tuple[int, int, int, int]) -> float: + inter_w = max(0.0, min(a[2], b[2]) - max(a[0], b[0])) + inter_h = max(0.0, min(a[3], b[3]) - max(a[1], b[1])) + inter = inter_w * inter_h + if inter <= 0: + return 0.0 + union = (a[2] - a[0]) * (a[3] - a[1]) + (b[2] - b[0]) * (b[3] - b[1]) - inter + return inter / union if union > 0 else 0.0 @staticmethod def _bbox_coverage(bbox: list, box: Tuple[int, int, int, int], img_w: int, img_h: int) -> float: @@ -492,21 +512,25 @@ def _bbox_coverage(bbox: list, box: Tuple[int, int, int, int], img_w: int, img_h return inter_w * inter_h / area def _assign_crop_boxes(self, bboxes: list, cam_key: str, full_w: int, full_h: int) -> list: - """Pick, for each bbox, the frozen event box with the largest overlap; add one if none overlaps.""" + """Pick, for each bbox, the frozen box covering it best; add a fresh one if none covers it enough. + + Gating on coverage (not raw overlap) keeps a drifted bbox from being cropped on an old box it + only grazes: when the best frozen box covers less than MIN_BBOX_COVERAGE, the bbox is given a + box centered on it — unless it is too large for any square crop to cover better. + """ frozen = self._states[cam_key]["event_crop_boxes"] assigned = [] for bbox in bboxes: - bx1, by1 = bbox[0] * full_w, bbox[1] * full_h - bx2, by2 = bbox[2] * full_w, bbox[3] * full_h - best, best_area = None, 0.0 + best, best_cov = None, -1.0 for box in frozen: - inter_w = max(0.0, min(bx2, box[2]) - max(bx1, box[0])) - inter_h = max(0.0, min(by2, box[3]) - max(by1, box[1])) - if inter_w * inter_h > best_area: - best, best_area = box, inter_w * inter_h - if best is None: - best = self._compute_crop_box([bbox], full_w, full_h) - frozen.append(best) + cov = self._bbox_coverage(bbox, box, full_w, full_h) + if cov > best_cov: + best, best_cov = box, cov + if best is None or best_cov < MIN_BBOX_COVERAGE: + fresh = self._compute_crop_box([bbox], full_w, full_h) + if best is None or self._bbox_coverage(bbox, fresh, full_w, full_h) > best_cov: + self._add_crop_box(frozen, fresh) + best = fresh assigned.append(best) return assigned diff --git a/tests/test_engine.py b/tests/test_engine.py index f75adfbd..67ec081f 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -360,6 +360,39 @@ def test_event_crop_boxes_frozen(tmp_path): assert box_grown[2] - box_grown[0] > box_t0[2] - box_t0[0] +def test_assign_crop_boxes_rejects_grazing_box(tmp_path): + """A bbox that only grazes an existing frozen box is given a fresh box, not cropped on the old one.""" + engine = Engine(cache_folder=str(tmp_path)) + cam_key = "dummy_cam" + engine._states[cam_key] = engine._new_state() + full_w, full_h = 3840, 2160 + + bbox = [0.40, 0.40, 0.45, 0.45, 0.8] + engine._update_event_crop_boxes(cam_key, [bbox], full_w, full_h) + box = engine._states[cam_key]["event_crop_boxes"][0] + + # A drifted bbox whose corner just touches `box`: < MIN_BBOX_COVERAGE, so it must not reuse it + drifted = [0.452, 0.452, 0.502, 0.502, 0.5] + assigned = engine._assign_crop_boxes([drifted], cam_key, full_w, full_h)[0] + assert assigned != box + assert engine._bbox_coverage(drifted, assigned, full_w, full_h) >= 0.8 + + +def test_event_crop_boxes_bounded_for_oversized_cluster(tmp_path): + """An oversized cluster (uncoverable by a square crop) does not grow event_crop_boxes forever.""" + engine = Engine(cache_folder=str(tmp_path)) + cam_key = "dummy_cam" + engine._states[cam_key] = engine._new_state() + full_w, full_h = 3840, 2160 + + # Very wide bbox: the square crop is capped to img height, so it cannot cover 80% of the width. + wide = [0.05, 0.48, 0.95, 0.52, 0.8] + for _ in range(10): + engine._update_event_crop_boxes(cam_key, [wide], full_w, full_h) + engine._assign_crop_boxes([wide], cam_key, full_w, full_h) + assert len(engine._states[cam_key]["event_crop_boxes"]) <= 2 + + def test_encode_detection_crops_one_per_bbox(tmp_path): """_encode_detection_crops returns one 224x224 JPEG per bbox, aligned by index.""" engine = Engine(cache_folder=str(tmp_path)) From c3f8ab3bd522f6776ec6fdf49b4fff40643301d8 Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 12:26:20 +0200 Subject: [PATCH 07/10] feat(engine): keep a crop on no-detection frames during an alert A frame with no detection inside an ongoing alert no longer produces a blank/placeholder frame: it builds a context crop of the current frame around the known fire location (the frozen crop boxes) and carries the previous bbox forward at conf 0, so the alert sequence keeps a crop at the same spot instead of a gap. --- pyroengine/engine.py | 67 +++++++++++++++++++++++++++++++------------- tests/test_engine.py | 27 ++++++++++++++++++ 2 files changed, 74 insertions(+), 20 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index 2ab5732d..dba2eb11 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -314,6 +314,10 @@ def predict( # Store only a compact JPEG region around the detections so _process_alerts can crop at # full resolution without keeping the whole original frame in RAM. context_crop = self._build_context_crop(original_frame, preds) + if context_crop is None and self._states[cam_key]["ongoing"] and self._states[cam_key]["event_crop_boxes"]: + # No detection this frame but an alert is ongoing: still keep a crop of this frame at the + # known fire location so the alert sequence does not get a blank/placeholder frame. + context_crop = self._context_crop_for_boxes(original_frame, self._states[cam_key]["event_crop_boxes"]) conf = self._update_states(context_crop, preds, cam_key, encoded_bytes=encoded_bytes) if not self._states[cam_key]["ongoing"]: # Event over: drop the frozen crop boxes so the next event re-centers. @@ -341,23 +345,24 @@ def predict( if tracked and full_size is not None: self._update_event_crop_boxes(cam_key, tracked, *full_size) + # Carry the last seen bbox forward onto frames with no detection so the alert keeps a + # crop at the same location instead of a blank/placeholder frame (conf 0 flags the carry). + last_seen: list = [] for idx, (crop_, preds_, bboxes, ts, is_staged, jpeg_bytes) in enumerate(state["last_predictions"]): - if not is_staged: - bboxes = self._backfill_bboxes(bboxes, preds_, tracked_arr) - crop_boxes = ( - self._assign_crop_boxes(bboxes, cam_key, *full_size) - if bboxes and full_size is not None - else None - ) - self._stage_alert(crop_, cam_id, ts, bboxes, jpeg_bytes, crop_boxes) - state["last_predictions"][idx] = ( - crop_, - preds_, - bboxes, - ts, - True, - jpeg_bytes, - ) + if is_staged: + if bboxes: + last_seen = bboxes + continue + bboxes = self._backfill_bboxes(bboxes, preds_, tracked_arr) + if not bboxes and last_seen: + bboxes = [[b[0], b[1], b[2], b[3], 0.0] for b in last_seen] + if bboxes: + last_seen = bboxes + crop_boxes = ( + self._assign_crop_boxes(bboxes, cam_key, *full_size) if bboxes and full_size is not None else None + ) + self._stage_alert(crop_, cam_id, ts, bboxes, jpeg_bytes, crop_boxes) + state["last_predictions"][idx] = (crop_, preds_, bboxes, ts, True, jpeg_bytes) return float(conf) @@ -416,11 +421,33 @@ def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional return None img_w, img_h = frame.size arr = np.asarray(preds, dtype=float) - x1 = float(arr[:, 0].min()) * img_w - y1 = float(arr[:, 1].min()) * img_h - x2 = float(arr[:, 2].max()) * img_w - y2 = float(arr[:, 3].max()) * img_h + union = ( + float(arr[:, 0].min()) * img_w, + float(arr[:, 1].min()) * img_h, + float(arr[:, 2].max()) * img_w, + float(arr[:, 3].max()) * img_h, + ) + return self._encode_context_region(frame, union) + def _context_crop_for_boxes(self, frame: Image.Image, boxes: list) -> Optional[ContextCrop]: + """Context crop around already-known crop boxes (full-frame px), for frames with no detection. + + Lets an ongoing alert keep a crop of the current frame at the known fire location instead of + a blank/placeholder frame, so the alert sequence stays visually continuous. + """ + if not boxes: + return None + union = ( + min(b[0] for b in boxes), + min(b[1] for b in boxes), + max(b[2] for b in boxes), + max(b[3] for b in boxes), + ) + return self._encode_context_region(frame, union) + + def _encode_context_region(self, frame: Image.Image, union_px: Tuple[float, float, float, float]) -> ContextCrop: + img_w, img_h = frame.size + x1, y1, x2, y2 = union_px side = max(x2 - x1, y2 - y1) * (1.0 + CONTEXT_PADDING) target_w = min(max(side, CONTEXT_MIN_SIDE), img_w) target_h = min(max(side, CONTEXT_MIN_SIDE), img_h) diff --git a/tests/test_engine.py b/tests/test_engine.py index 9a9caf18..58fc5569 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -393,6 +393,33 @@ def test_event_crop_boxes_bounded_for_oversized_cluster(tmp_path): assert len(engine._states[cam_key]["event_crop_boxes"]) <= 2 +def test_no_detection_frame_keeps_crop_via_frozen_box(tmp_path): + """A frame with no detection during an alert still yields a crop at the known fire location.""" + engine = Engine(cache_folder=str(tmp_path)) + cam_key = "169.254.7.3_3" + engine._states[cam_key] = engine._new_state() + full_w, full_h = 1280, 720 + + # An earlier frame detected a fire and froze a crop box there. + bbox = [0.45, 0.45, 0.55, 0.55, 0.8] + engine._update_event_crop_boxes(cam_key, [bbox], full_w, full_h) + boxes = engine._states[cam_key]["event_crop_boxes"] + assert boxes + + # Current frame has no detection: a context crop is still built around the frozen box, + # and the carried-forward bbox (conf 0) yields a real 224 crop, not a placeholder. + frame = Image.new("RGB", (full_w, full_h)) + context = engine._context_crop_for_boxes(frame, boxes) + assert isinstance(context, ContextCrop) + + carried = [[bbox[0], bbox[1], bbox[2], bbox[3], 0.0]] + crop_boxes = engine._assign_crop_boxes(carried, cam_key, full_w, full_h) + crops = engine._encode_detection_crops(context, carried, crop_boxes) + assert crops is not None + assert len(crops) == 1 + assert Image.open(io.BytesIO(crops[0])).size == (224, 224) + + def test_encode_detection_crops_one_per_bbox(tmp_path): """_encode_detection_crops returns one 224x224 JPEG per bbox, aligned by index.""" engine = Engine(cache_folder=str(tmp_path)) From 0b75faebf9f3f223fb31b2f1804337416a1153d6 Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 12:32:23 +0200 Subject: [PATCH 08/10] chore(engine): lower detection-crop JPEG quality (95 small / 90 large) --- pyroengine/engine.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index dba2eb11..ec7ec89b 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -42,6 +42,10 @@ CONTEXT_JPEG_QUALITY = 95 # Padding applied around a bbox cluster for the final 224x224 detection crops. CROP_PADDING = 0.20 +# JPEG quality of the uploaded 224x224 detection crop: a bit lower when it was downscaled from a +# larger region, a bit higher for small crops kept near native size (still without chroma subsampling). +CROP_JPEG_QUALITY_LARGE = 90 +CROP_JPEG_QUALITY_SMALL = 95 # Fraction of a bbox that must fall inside a frozen event crop box to reuse it; below this # (e.g. a plume that outgrew its box) a new frozen box is added so the crop re-anchors once. MIN_BBOX_COVERAGE = 0.8 @@ -610,10 +614,10 @@ def _encode_detection_crops( crop = crop.resize((224, 224), Image.LANCZOS) # type: ignore[attr-defined] buf = io.BytesIO() if downscaling: - crop.save(buf, format="JPEG", quality=95) + crop.save(buf, format="JPEG", quality=CROP_JPEG_QUALITY_LARGE) else: - # Crop was at or below 224 — preserve detail with no chroma subsampling. - crop.save(buf, format="JPEG", quality=100, subsampling=0, optimize=True) + # Crop was at or below 224 — keep more detail with no chroma subsampling. + crop.save(buf, format="JPEG", quality=CROP_JPEG_QUALITY_SMALL, subsampling=0, optimize=True) crops.append(buf.getvalue()) return crops From 5b8737e61d0f3f6e2b3846cfd4402350e34b15ab Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 13:40:05 +0200 Subject: [PATCH 09/10] fix(engine): cover frozen fire boxes in the context crop during alerts During an ongoing alert _build_context_crop now folds the frozen event boxes into the covered region, so a frame whose preds are elsewhere (or absent) still stores a crop containing the fire location. Without this a carried-forward bbox could be cropped from the wrong region. Also lower CONTEXT_MAX_SIDE 1536 -> 1024 (still well above the ~560px floor needed to avoid upscaling the 224 crop), roughly halving worst-case crop RAM. --- pyroengine/engine.py | 72 +++++++++++++++++++++++--------------------- tests/test_engine.py | 31 +++++++++++++++++-- 2 files changed, 65 insertions(+), 38 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index ec7ec89b..021de860 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -38,7 +38,10 @@ # detail loss), large ones are downscaled, which is fine since the final crop is 224x224 anyway. CONTEXT_PADDING = 2.0 CONTEXT_MIN_SIDE = 1024 -CONTEXT_MAX_SIDE = 1536 +# Pixel cap on the stored region. The final 224x224 crop is cut from the frozen box (~0.4x the +# field of view), so this only needs to stay above ~560 px to avoid upscaling that crop; 1024 keeps +# a ~410 px source for the largest detection while bounding RAM. +CONTEXT_MAX_SIDE = 1024 CONTEXT_JPEG_QUALITY = 95 # Padding applied around a bbox cluster for the final 224x224 detection crops. CROP_PADDING = 0.20 @@ -316,12 +319,12 @@ def predict( logger.info(f"pred for {cam_key} : {preds}") # Store only a compact JPEG region around the detections so _process_alerts can crop at - # full resolution without keeping the whole original frame in RAM. - context_crop = self._build_context_crop(original_frame, preds) - if context_crop is None and self._states[cam_key]["ongoing"] and self._states[cam_key]["event_crop_boxes"]: - # No detection this frame but an alert is ongoing: still keep a crop of this frame at the - # known fire location so the alert sequence does not get a blank/placeholder frame. - context_crop = self._context_crop_for_boxes(original_frame, self._states[cam_key]["event_crop_boxes"]) + # full resolution without keeping the whole original frame in RAM. During an ongoing alert, + # also cover the frozen fire locations so carried-forward / backfilled crops are cut from the + # right place even when this frame's preds are elsewhere or absent. + state = self._states[cam_key] + extra_boxes = state["event_crop_boxes"] if state["ongoing"] else None + context_crop = self._build_context_crop(original_frame, preds, extra_boxes) conf = self._update_states(context_crop, preds, cam_key, encoded_bytes=encoded_bytes) if not self._states[cam_key]["ongoing"]: # Event over: drop the frozen crop boxes so the next event re-centers. @@ -413,39 +416,38 @@ def _compute_crop_box( left, top, right, bottom = Engine._fit_box((cx - half, cy - half, cx + half, cy + half), img_w, img_h) return round(left), round(top), round(right), round(bottom) - def _build_context_crop(self, frame: Image.Image, preds: np.ndarray) -> Optional[ContextCrop]: - """Encode a region around the raw predictions instead of keeping the full frame. + def _build_context_crop( + self, frame: Image.Image, preds: np.ndarray, extra_boxes: Optional[list] = None + ) -> Optional[ContextCrop]: + """Encode a region around the predictions (and any extra px boxes) instead of the full frame. - The field of view is wide (3x the largest preds-union side, floor CONTEXT_MIN_SIDE) so the - frozen crop box stays inside it even when smoke drifts with the wind. RAM is bounded by - downscaling the pixels above CONTEXT_MAX_SIDE, not by narrowing the view: small regions keep - full resolution, large ones are downscaled (harmless since the final crop is 224x224). - """ - if preds.shape[0] == 0: - return None - img_w, img_h = frame.size - arr = np.asarray(preds, dtype=float) - union = ( - float(arr[:, 0].min()) * img_w, - float(arr[:, 1].min()) * img_h, - float(arr[:, 2].max()) * img_w, - float(arr[:, 3].max()) * img_h, - ) - return self._encode_context_region(frame, union) + The field of view is wide (3x the largest covered side, floor CONTEXT_MIN_SIDE) so the frozen + crop box stays inside it even when smoke drifts with the wind. RAM is bounded by downscaling + the pixels above CONTEXT_MAX_SIDE, not by narrowing the view: small regions keep full + resolution, large ones are downscaled (harmless since the final crop is 224x224). - def _context_crop_for_boxes(self, frame: Image.Image, boxes: list) -> Optional[ContextCrop]: - """Context crop around already-known crop boxes (full-frame px), for frames with no detection. - - Lets an ongoing alert keep a crop of the current frame at the known fire location instead of - a blank/placeholder frame, so the alert sequence stays visually continuous. + `extra_boxes` (full-frame px) are folded into the covered region so that, during an ongoing + alert, the frozen fire locations are always inside the stored crop even when this frame's + preds are elsewhere or absent. """ - if not boxes: + img_w, img_h = frame.size + regions: list = [] + if preds.shape[0]: + arr = np.asarray(preds, dtype=float) + regions.append(( + float(arr[:, 0].min()) * img_w, + float(arr[:, 1].min()) * img_h, + float(arr[:, 2].max()) * img_w, + float(arr[:, 3].max()) * img_h, + )) + regions.extend((float(box[0]), float(box[1]), float(box[2]), float(box[3])) for box in extra_boxes or []) + if not regions: return None union = ( - min(b[0] for b in boxes), - min(b[1] for b in boxes), - max(b[2] for b in boxes), - max(b[3] for b in boxes), + min(r[0] for r in regions), + min(r[1] for r in regions), + max(r[2] for r in regions), + max(r[3] for r in regions), ) return self._encode_context_region(frame, union) diff --git a/tests/test_engine.py b/tests/test_engine.py index 58fc5569..e10d46ae 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -12,7 +12,7 @@ from dotenv import load_dotenv from PIL import Image -from pyroengine.engine import PLACEHOLDER_BBOX, ContextCrop, Engine +from pyroengine.engine import CONTEXT_MAX_SIDE, PLACEHOLDER_BBOX, ContextCrop, Engine def test_engine_offline(tmpdir_factory, mock_wildfire_image, mock_forest_image): @@ -303,7 +303,7 @@ def test_build_context_crop(tmp_path): preds = np.array([[0.30, 0.30, 0.69, 0.69, 0.8]]) # ~1500x842 px square-ish, large context = engine._build_context_crop(frame, preds) region = Image.open(io.BytesIO(context.jpeg)) - assert max(region.size) <= 1536 # pixels capped (CONTEXT_MAX_SIDE) + assert max(region.size) <= CONTEXT_MAX_SIDE # pixels capped assert region.size != (context.right - context.left, context.bottom - context.top) # downscaled crop_box = engine._compute_crop_box(preds.tolist(), 3840, 2160) assert context.right - context.left >= crop_box[2] - crop_box[0] @@ -409,7 +409,7 @@ def test_no_detection_frame_keeps_crop_via_frozen_box(tmp_path): # Current frame has no detection: a context crop is still built around the frozen box, # and the carried-forward bbox (conf 0) yields a real 224 crop, not a placeholder. frame = Image.new("RGB", (full_w, full_h)) - context = engine._context_crop_for_boxes(frame, boxes) + context = engine._build_context_crop(frame, np.empty((0, 5)), boxes) assert isinstance(context, ContextCrop) carried = [[bbox[0], bbox[1], bbox[2], bbox[3], 0.0]] @@ -420,6 +420,31 @@ def test_no_detection_frame_keeps_crop_via_frozen_box(tmp_path): assert Image.open(io.BytesIO(crops[0])).size == (224, 224) +def test_context_crop_covers_frozen_box_when_pred_elsewhere(tmp_path): + """During an alert, a pred far from the fire still yields a context crop covering the frozen box.""" + engine = Engine(cache_folder=str(tmp_path)) + cam_key = "169.254.7.3_3" + engine._states[cam_key] = engine._new_state() + full_w, full_h = 1280, 720 + + fire = [0.80, 0.80, 0.88, 0.88, 0.8] + engine._update_event_crop_boxes(cam_key, [fire], full_w, full_h) + frozen = engine._states[cam_key]["event_crop_boxes"] + fire_box = frozen[0] + + # This frame's only raw prediction is in the opposite corner, away from the fire. + far_pred = np.array([[0.05, 0.05, 0.10, 0.10, 0.6]]) + frame = Image.new("RGB", (full_w, full_h)) + context = engine._build_context_crop(frame, far_pred, frozen) + + # The stored region must still fully contain the frozen fire box, so a carried-forward crop + # there is cut from the right place instead of being clipped to the far prediction. + assert context.left <= fire_box[0] + assert context.top <= fire_box[1] + assert context.right >= fire_box[2] + assert context.bottom >= fire_box[3] + + def test_encode_detection_crops_one_per_bbox(tmp_path): """_encode_detection_crops returns one 224x224 JPEG per bbox, aligned by index.""" engine = Engine(cache_folder=str(tmp_path)) From e29027d3b266ced802f12ae6143a1511688142f4 Mon Sep 17 00:00:00 2001 From: Mateo Date: Fri, 12 Jun 2026 15:46:32 +0200 Subject: [PATCH 10/10] fix(engine): clear staged bboxes when an event ends When a new alert starts while the window still holds staged frames from a previous event, carry-forward could seed the old fire location onto gap frames of the new event. _end_event now clears staged frames' bboxes (keeping unstaged lead-up frames), so a previous event's location cannot bleed into the next event's carry-forward or tracked set. --- pyroengine/engine.py | 18 ++++++++++++++++-- tests/test_engine.py | 22 ++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/pyroengine/engine.py b/pyroengine/engine.py index 021de860..4cec523e 100644 --- a/pyroengine/engine.py +++ b/pyroengine/engine.py @@ -204,6 +204,21 @@ def _new_state(self) -> Dict[str, Any]: state["event_crop_boxes"] = [] return state + def _end_event(self, cam_key: str) -> None: + """Reset per-event staging state when an alert ends. + + Drops the frozen crop boxes so the next event re-centers, and clears the bboxes of + already-staged frames still lingering in the window so a previous event's fire location + cannot seed the next event's carry-forward / tracked set. Unstaged frames (the lead-up to + the next event) keep their bboxes. + """ + state = self._states[cam_key] + state["event_crop_boxes"] = [] + window = state["last_predictions"] + for i, entry in enumerate(window): + if entry[4]: # is_staged: belongs to the event that just ended + window[i] = (entry[0], entry[1], [], entry[3], True, entry[5]) + def heartbeat(self, cam_id: str) -> Response: """Updates last ping of device""" ip = cam_id.split("_")[0] @@ -327,8 +342,7 @@ def predict( context_crop = self._build_context_crop(original_frame, preds, extra_boxes) conf = self._update_states(context_crop, preds, cam_key, encoded_bytes=encoded_bytes) if not self._states[cam_key]["ongoing"]: - # Event over: drop the frozen crop boxes so the next event re-centers. - self._states[cam_key]["event_crop_boxes"] = [] + self._end_event(cam_key) if self.save_captured_frames: self._local_backup(frame, cam_id, is_alert=False, encoded_bytes=encoded_bytes) diff --git a/tests/test_engine.py b/tests/test_engine.py index e10d46ae..b2d03de2 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -445,6 +445,28 @@ def test_context_crop_covers_frozen_box_when_pred_elsewhere(tmp_path): assert context.bottom >= fire_box[3] +def test_end_event_clears_staged_bboxes_only(tmp_path): + """Ending an event drops frozen boxes and staged bboxes, but keeps unstaged lead-up frames.""" + engine = Engine(cache_folder=str(tmp_path)) + cam_key = "169.254.7.3_3" + engine._states[cam_key] = engine._new_state() + state = engine._states[cam_key] + state["event_crop_boxes"] = [(10, 10, 50, 50)] + + staged = (None, np.empty((0, 5)), [[0.8, 0.8, 0.9, 0.9, 0.7]], "t1", True, b"x") + lead_up = (None, np.empty((0, 5)), [[0.1, 0.1, 0.2, 0.2, 0.6]], "t2", False, b"y") + state["last_predictions"].append(staged) + state["last_predictions"].append(lead_up) + + engine._end_event(cam_key) + + assert state["event_crop_boxes"] == [] + # Staged frame from the ended event: bbox cleared so it cannot seed the next event's carry-forward + assert state["last_predictions"][0][2] == [] + # Unstaged lead-up frame: untouched + assert state["last_predictions"][1][2] == [[0.1, 0.1, 0.2, 0.2, 0.6]] + + def test_encode_detection_crops_one_per_bbox(tmp_path): """_encode_detection_crops returns one 224x224 JPEG per bbox, aligned by index.""" engine = Engine(cache_folder=str(tmp_path))