diff --git a/.github/workflows/docs_build_and_deploy.yml b/.github/workflows/docs_build_and_deploy.yml index 7996b08..e7364ce 100644 --- a/.github/workflows/docs_build_and_deploy.yml +++ b/.github/workflows/docs_build_and_deploy.yml @@ -29,7 +29,7 @@ jobs: steps: - uses: neuroinformatics-unit/actions/build_sphinx_docs@main with: - python-version: "3.13" + python-version: "3.14" use-requirements-txt: false use-make: true github-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index 7c107c1..1115a18 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -27,14 +27,14 @@ jobs: strategy: matrix: # Run all supported Python versions on linux - python-version: ["3.11", "3.12", "3.13"] + python-version: ["3.12", "3.13", "3.14"] os: [ubuntu-latest] # Include one windows and macos run include: - os: macos-latest - python-version: "3.13" + python-version: "3.14" - os: windows-latest - python-version: "3.13" + python-version: "3.14" steps: # Run tests diff --git a/docs/source/api_index.rst b/docs/source/api_index.rst index 796b813..7e8de89 100644 --- a/docs/source/api_index.rst +++ b/docs/source/api_index.rst @@ -12,6 +12,7 @@ io annotations_to_poseinterface video_to_poseinterface + predictions_to_poseinterface clips ----- diff --git a/poseinterface/io.py b/poseinterface/io.py index c5a24c7..4a6b798 100644 --- a/poseinterface/io.py +++ b/poseinterface/io.py @@ -8,7 +8,10 @@ from pathlib import Path from typing import Literal, TypeAlias +import numpy as np import sleap_io as sio +import xarray as xr +from movement.io import load_dataset from sleap_io.io import coco from sleap_io.io.cli import _get_video_encoding_info, _is_ffmpeg_available from sleap_io.io.dlc import is_dlc_file @@ -461,3 +464,198 @@ def _reencode_video( ) logging.info(f"Re-encoded video saved to {reencoded_video_path}") return reencoded_video_path + + +def predictions_to_poseinterface( + input_path: Path | str, + video_path: Path | str, + output_dir: Path | str, + *, + sub_id: str, + ses_id: str, + cam_id: str, +) -> Path: + """Convert a prediction file to ``poseinterface`` COCO JSON format. + + This function reads predictions for a given video and writes the + corresponding "video-level" COCO JSON labels in the ``poseinterface`` + format, (i.e. a + ``sub-_ses-_cam-_videolabels.json`` file). + + The output JSON file is meant to facilitate the extraction of "clip-level" + labels, (i.e. files of the format + ``sub-_ses-_cam-_start-_dur-_cliplabels.json``). + + Parameters + ---------- + input_path + Path to the predictions file. It should be one of the formats + supported by ``movement`` (see `movement supported formats`_) + video_path + Path to the corresponding video file. Used to attach video + metadata (resolution) to the COCO output. + output_dir + Path to the directory where to save the output JSON file. + sub_id + Subject ID to include in the generated filenames. + ses_id + Session ID to include in the generated filenames. + cam_id + Camera ID to include in the generated filenames. + + Returns + ------- + Path + Path to the saved COCO JSON file. + + Notes + ------- + For the full list of supported formats for the input file, see + `movement supported formats`_. + + .. _movement supported formats: + https://movement.neuroinformatics.dev/dev/user_guide/input_output.html#supported-third-party-formats + + + """ + # Read input file as movement dataset + # NOTE: fps=None is ignore with NWB files + ds = load_dataset( + file=input_path, + source_software="auto", # infer from validators + fps=None, + ) + + # Read video object + video_path = Path(video_path) + if not video_path.is_file(): + raise FileNotFoundError( + f"Input video file does not exist: {video_path}" + ) + video = sio.load_video(video_path) + + # Get video image width and height + if video.shape is None: + raise ValueError(f"Could not extract video shape from {video_path}. ") + _, img_h, img_w, _ = video.shape + + # Convert movement dataset to videolabels dict + coco_data = _convert_movement_ds_to_videolabels( + ds, + sub_id=sub_id, + ses_id=ses_id, + cam_id=cam_id, + img_h=img_h, + img_w=img_w, + ) + + # Export dict as JSON + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + output_json_path = ( + output_dir / f"sub-{sub_id}_ses-{ses_id}_cam-{cam_id}_videolabels.json" + ) + with open(output_json_path, "w") as f: + json.dump(coco_data, f) + + return output_json_path + + +def _convert_movement_ds_to_videolabels( + ds: xr.Dataset, + *, + sub_id: str, + ses_id: str, + cam_id: str, + img_w: int, + img_h: int, +) -> dict[str, list[dict]]: + """Convert predictions in movement dataset to videolabels dict.""" + # Extract position array and coordinates from dataset + positions = ds["position"].values # (time, space, keypoints, individuals) + n_frames = positions.shape[0] + + keypoint_names = ds.coords["keypoints"].values.tolist() + individual_names = ds.coords["individuals"].values.tolist() + + # Build categories list (one entry per individual) + # NOTE: categories are 1-indexed to avoid conflicts + # with models that treat category 0 as background. + categories = [ + { + "id": i, + "name": name, + "keypoints": keypoint_names, + "skeleton": [], + } + for i, name in enumerate(individual_names, start=1) + ] + + # Build images list (one entry per frame) + # NOTE: image id values are always 0-indexed + frame_idx_width = len(str(n_frames - 1)) + images = [ + { + "id": t, + "file_name": ( + f"sub-{sub_id}_ses-{ses_id}_cam-{cam_id}_frame-{t:0{frame_idx_width}d}" + ), + "width": img_w, + "height": img_h, + } + for t in range(n_frames) + ] + + # Build annotations list (one entry per frame per individual) + annotations = [] + annot_id = 1 + for t in range(n_frames): + for i in range(len(individual_names)): + # Get position data for this frame and individual + xy = positions[t, :, :, i].T # (n_keypoints, 2) + + # Determine kpt visibility: + # 0: not labeled + # 1: labeled but not visible (occluded) + # 2: labeled and visible + # NOTE: The current code only assigns 0 or 2 because the movement + # dataset doesn't carry occlusion information + visible_array = ~np.isnan(xy[:, 0]) & ~np.isnan( + xy[:, 1] + ) # (n_keypoints,) + n_visible = int(visible_array.sum()) + + # Compute bbox from visible keypoints + # (zeros if no keypoints are visible) + if n_visible > 0: + x_visible = xy[visible_array, 0] + y_visible = xy[visible_array, 1] + x_min = float(x_visible.min()) + y_min = float(y_visible.min()) + bbox_w = float(x_visible.max()) - x_min + bbox_h = float(y_visible.max()) - y_min + else: + x_min, y_min, bbox_w, bbox_h = 0.0, 0.0, 0.0, 0.0 + + # Append results to list of annotations + annotations.append( + { + "id": annot_id, + "image_id": t, + "category_id": i + 1, + "keypoints": coco.encode_keypoints( + np.c_[xy, visible_array] + ), # returns flattened kpts [x1, y1, v1, x2, y2, v2, ...] + "num_keypoints": n_visible, + "bbox": [x_min, y_min, bbox_w, bbox_h], + "area": bbox_w * bbox_h, + "iscrowd": 0, + } + ) + annot_id += 1 + + return { + "images": images, + "annotations": annotations, + "categories": categories, + } diff --git a/pyproject.toml b/pyproject.toml index 12647a1..919595f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,11 +7,13 @@ authors = [ ] description = "A framework for benchmarking pose estimation and point tracking methods on animal beheviour videos." readme = "README.md" -requires-python = ">=3.11.0" +requires-python = ">=3.12.0" dynamic = ["version"] dependencies = [ + "jupyter>=1.1.1", "sleap-io>=0.6.4", + "movement" ] license = {text = "BSD-3-Clause"} @@ -20,9 +22,9 @@ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "License :: OSI Approved :: BSD License", ] @@ -120,14 +122,14 @@ docstring-code-format = true # Also format code in docstrings (e.g. examples) [tool.tox] legacy_tox_ini = """ [tox] -envlist = py{311,312,313} +envlist = py{312,313,314} isolated_build = True [gh-actions] python = - 3.11: py311 3.12: py312 3.13: py313 + 3.14: py314 [testenv] dependency_groups = diff --git a/tests/test_unit/conftest.py b/tests/test_unit/conftest.py new file mode 100644 index 0000000..c99c4d3 --- /dev/null +++ b/tests/test_unit/conftest.py @@ -0,0 +1,27 @@ +from unittest.mock import MagicMock + +import pytest + + +@pytest.fixture +def get_mock_video(): + """Mock a Video object with n frames, matching video_labels fixture. + + The returned image size should not be square (that is, image width should + not be equal to image height) to correctly check for inadvertent swaps + between these two values. + """ + + def _get_mock_video(n_frames): + # For the fixture to correctly check if image width and height + # are mistakenly swapped in the code, the following values should + # not be the same (i.e., the frame should not be square) + img_height = 480 + img_width = 640 + + video = MagicMock() + video.fps = 30 + video.shape = (n_frames, img_height, img_width, 3) + return video + + return _get_mock_video diff --git a/tests/test_unit/test_clips.py b/tests/test_unit/test_clips.py index 076d8b1..5adfb28 100644 --- a/tests/test_unit/test_clips.py +++ b/tests/test_unit/test_clips.py @@ -1,7 +1,7 @@ import argparse import json import logging -from unittest.mock import MagicMock, patch +from unittest.mock import patch import pytest @@ -37,23 +37,8 @@ def video_path(tmp_path, video_labels): return path -@pytest.fixture -def mock_video(): - """Mock Video object with 10 frames, matching video_labels fixture.""" - video = MagicMock() - video.fps = 30 - video.shape = (10, 480, 640, 3) - video.stem = "sub-01_ses-01_cam-01" - return video - - -def test_extract_cliplabels(tmp_path, video_labels): +def test_extract_cliplabels(tmp_path, video_path, video_labels): """Test clip json file is extracted from the *_videolabels.json file.""" - # Set up fake video path and corresponding videolabels.json - video_path = tmp_path / "sub-01_ses-01_cam-01.mp4" - json_path = tmp_path / "sub-01_ses-01_cam-01_videolabels.json" - json_path.write_text(json.dumps(video_labels)) - # Set up a "Clips" destination directory clips_dir = tmp_path / "Clips" clips_dir.mkdir() @@ -89,10 +74,11 @@ def test_extract_cliplabels(tmp_path, video_labels): @patch("poseinterface.clips.sio.save_video") @patch("poseinterface.clips.sio.load_video") def test_extract_clip( - mock_load_video, mock_save_video, mock_video, video_path + mock_load_video, mock_save_video, get_mock_video, video_path ): """Test clip video and json are extracted from the input video.""" # Set mock_video as return value from load_video + mock_video = get_mock_video(n_frames=10) mock_load_video.return_value = mock_video # Extract clip @@ -120,10 +106,11 @@ def test_extract_clip( @patch("poseinterface.clips.sio.save_video") @patch("poseinterface.clips.sio.load_video") def test_extract_clip_clamped( - mock_load_video, mock_save_video, mock_video, video_path, caplog + mock_load_video, mock_save_video, get_mock_video, video_path, caplog ): """Test clip video and json when duration is clamped.""" # Set mock_video as return value from load_video + mock_video = get_mock_video(n_frames=10) mock_load_video.return_value = mock_video # Define clipping range diff --git a/tests/test_unit/test_io.py b/tests/test_unit/test_io.py index d9239e5..4a342a7 100644 --- a/tests/test_unit/test_io.py +++ b/tests/test_unit/test_io.py @@ -2,8 +2,10 @@ from contextlib import nullcontext from unittest.mock import MagicMock, Mock, patch +import numpy as np import pytest import sleap_io as sio +import xarray as xr from pytest_lazy_fixtures import lf from poseinterface.io import ( @@ -12,6 +14,7 @@ REENCODING_PARAMS, _build_output_json_path, _check_ffmpeg, + _convert_movement_ds_to_videolabels, _extract_frame_number, _generate_poseinterface_filenames, _get_codec_pixelformat, @@ -20,10 +23,67 @@ _reencode_video, _update_image_ids, annotations_to_poseinterface, + predictions_to_poseinterface, video_to_poseinterface, ) +@pytest.fixture +def sample_movement_ds(): + """ + Build a minimal movement dataset. + (2 frames, 2 keypoints, 1 individual) + """ + # Initialise position array with NaN + # shape: (time, space, keypoints, individuals) + position_array = np.full((2, 2, 2, 1), np.nan) + + # Fill in frame 0: kpt0=(10, 30), kpt1=(20, 40) + position_array[0, :, :, 0] = [ + [10.0, 20.0], # x coordinates + [30.0, 40.0], # y coordinates + ] + + # Fill in frame 1: kpt0=NaN, kpt1=(50, 60) + position_array[1, :, 1, 0] = [50.0, 60.0] # x,y + + # Build confidence array + # shape: (time, keypoints, individuals) + confidence_array = np.array( + [ + [ + [0.9], # kpt0 + [0.8], # kpt1 + ], # frame 0 + [ + [np.nan], # kpt0 + [0.7], # kpt1 + ], # frame 1 + ], + dtype=np.float32, + ) + + # Return dataset + return xr.Dataset( + { + "position": ( + ["time", "space", "keypoints", "individuals"], + position_array, + ), + "confidence": ( + ["time", "keypoints", "individuals"], + confidence_array, + ), + }, + coords={ + "time": [0, 1], + "space": ["x", "y"], + "keypoints": ["Nose", "Tail"], + "individuals": ["id_0"], + }, + ) + + @patch("poseinterface.io.coco.convert_labels") @patch("poseinterface.io.sio.load_file") @pytest.mark.parametrize( @@ -530,3 +590,187 @@ def test_reencode_video(mock_load_video, mock_save_video, tmp_path): fps=video_fps, **REENCODING_PARAMS, ) + + +# ---------- predictions to poseinterface ---------------- + + +@patch("poseinterface.io._convert_movement_ds_to_videolabels") +@patch("poseinterface.io.sio.load_video") +@patch("poseinterface.io.load_dataset") +def test_predictions_to_poseinterface( + mock_load_dataset, + mock_load_video, + mock_convert, + sample_movement_ds, + sub_ses_cam_ids, + get_mock_video, + tmp_path, +): + """Test output path, filename, and saved JSON content.""" + # Get movement dataset + ds = sample_movement_ds + + # Mock video input files + fake_video = tmp_path / "foo.mp4" + fake_video.touch() + mock_video = get_mock_video(n_frames=3) + + # Pre-define a return value for `_convert_movement_ds_to_videolabels` + convert_output = { + "images": [{"id": 0, "file_name": "foo", "width": 10, "height": 20}], + "annotations": [{"id": 1, "image_id": 0}], + "categories": [{"id": 1, "name": "mouse"}], + } + + # Mock return values for supporting functions + mock_load_dataset.return_value = ds + mock_load_video.return_value = mock_video + mock_convert.return_value = convert_output + + # Get expected image width and height + # shape = (n_frames, img_height, img_width, 3) + _, expected_h, expected_w, _ = mock_video.shape + + # Convert predictions + result = predictions_to_poseinterface( + input_path="fake.csv", + video_path=fake_video, + output_dir=tmp_path / "nested" / "out", + # (use a nested dir from tmp_path to force creation) + **sub_ses_cam_ids, + ) + + # Check surrounding code correctly routes image height to + # img_h and image width to img_w when calling the + # _convert_movement_ds_to_videolabels function + mock_convert.assert_called_once_with( + ds, **sub_ses_cam_ids, img_h=expected_h, img_w=expected_w + ) + + # Check output file exists with expected name + assert result.exists() + assert ( + result.name + == "_".join( + [f"{ky.strip('_id')}-{val}" for ky, val in sub_ses_cam_ids.items()] + ) + + "_videolabels.json" + ) + + # Check output json file contains the mock output from + # the convert function + with open(result) as f: + assert json.load(f) == convert_output + + +@patch("poseinterface.io.load_dataset") +def test_predictions_to_poseinterface_video_file_missing( + mock_load_dataset, + sample_movement_ds, + sub_ses_cam_ids, + tmp_path, +): + """Check FileNotFoundError is raised when the video path does not exist.""" + mock_load_dataset.return_value = sample_movement_ds + + with pytest.raises( + FileNotFoundError, match="Input video file does not exist" + ): + predictions_to_poseinterface( + input_path="fake.csv", + video_path=tmp_path / "does_not_exist.mp4", + output_dir=tmp_path, + **sub_ses_cam_ids, + ) + + +@patch("poseinterface.io.sio.load_video") +@patch("poseinterface.io.load_dataset") +def test_predictions_to_poseinterface_video_shape_none( + mock_load_dataset, + mock_load_video, + sample_movement_ds, + sub_ses_cam_ids, + tmp_path, +): + """Check ValueError is raised when the loaded video has shape=None.""" + mock_load_dataset.return_value = sample_movement_ds + + # File exists on disk, but load_video can't read its shape + fake_video = tmp_path / "unreadable.mp4" + fake_video.touch() + mock_load_video.return_value = MagicMock(shape=None) + + with pytest.raises(ValueError, match="Could not extract video shape"): + predictions_to_poseinterface( + input_path="fake.csv", + video_path=fake_video, + output_dir=tmp_path, + **sub_ses_cam_ids, + ) + + +def test_convert_movement_ds_to_videolabels( + sample_movement_ds, + sub_ses_cam_ids, +): + """Test that movement dataset is converted to videolabels dict.""" + ds = sample_movement_ds + sub_id = sub_ses_cam_ids["sub_id"] + ses_id = sub_ses_cam_ids["ses_id"] + cam_id = sub_ses_cam_ids["cam_id"] + img_h, img_w = 480, 640 + + coco_data = _convert_movement_ds_to_videolabels( + ds, + **sub_ses_cam_ids, + img_h=img_h, + img_w=img_w, + ) + + assert set(coco_data.keys()) == {"images", "annotations", "categories"} + + assert len(coco_data["images"]) == len(ds.time) + for k in range(len(coco_data["images"])): + assert coco_data["images"][k]["file_name"] == ( + f"sub-{sub_id}_ses-{ses_id}_cam-{cam_id}_frame-{k:01d}" + ) + assert coco_data["images"][k]["width"] == img_w + assert coco_data["images"][k]["height"] == img_h + + assert len(coco_data["categories"]) == len(ds.individuals) + assert coco_data["categories"][0]["name"] == ds.individuals.values[0] + assert ( + coco_data["categories"][0]["keypoints"] == ds.keypoints.values.tolist() + ) + + # 2 frames x 1 individual = 2 annotations + assert len(coco_data["annotations"]) == len(ds.time) * len(ds.individuals) + + # Frame 0: both keypoints visible, kpt0=(10, 30), kpt1=(20, 40) + annot0 = coco_data["annotations"][0] + assert annot0["num_keypoints"] == 2 + assert annot0["keypoints"] == [ + *ds.position.isel(time=0, keypoints=0, individuals=0).values.tolist(), + 2.0, + *ds.position.isel(time=0, keypoints=1, individuals=0).values.tolist(), + 2.0, + ] + # bbox: [xmin, ymin, width, height] + assert annot0["bbox"] == [10.0, 30.0, 10.0, 10.0] + assert annot0["area"] == 100.0 + + # Frame 1: kpt0 is NaN, kpt1=(50, 60) + annot1 = coco_data["annotations"][1] + assert annot1["num_keypoints"] == 1 + assert annot1["keypoints"] == [ + 0.0, + 0.0, + 0.0, + *ds.position.isel(time=1, keypoints=1, individuals=0).values.tolist(), + 2.0, + ] + # bbox covers only the single visible keypoint + assert annot1["bbox"] == [50.0, 60.0, 0.0, 0.0] + assert annot1["area"] == 0.0