From 7bcac50c68ff6f18ed2b04124fc5db077ddf9474 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Tue, 19 May 2026 13:21:33 -0400 Subject: [PATCH 1/2] normalize feature cache subdir name; auto-rename legacy dirs jabs-classify and jabs-cli compute-features wrote feature caches to /_pose_est_vN/ while jabs-init and the GUI used //. Strip the _pose_est_vN suffix from the cache subdir via a shared pose_file_stem helper so all paths agree, and rename existing legacy dirs in IdentityFeatures.__init__ (skipped on collision). --- .../jabs-core/src/jabs/core/utils/__init__.py | 9 +- .../src/jabs/core/utils/utilities.py | 21 +++++ packages/jabs-core/tests/test_utilities.py | 52 +++++++++++ src/jabs/feature_extraction/features.py | 36 +++++++- src/jabs/scripts/classify.py | 16 +--- .../test_identity_features.py | 86 ++++++++++++++++++- 6 files changed, 203 insertions(+), 17 deletions(-) create mode 100644 packages/jabs-core/tests/test_utilities.py diff --git a/packages/jabs-core/src/jabs/core/utils/__init__.py b/packages/jabs-core/src/jabs/core/utils/__init__.py index 254bb954..297fc254 100644 --- a/packages/jabs-core/src/jabs/core/utils/__init__.py +++ b/packages/jabs-core/src/jabs/core/utils/__init__.py @@ -1,7 +1,13 @@ """JABS utilities""" from .update_checker import check_for_update, is_pypi_install -from .utilities import get_bool_env_var, hash_file, hide_stderr, to_safe_name +from .utilities import ( + get_bool_env_var, + hash_file, + hide_stderr, + pose_file_stem, + to_safe_name, +) __all__ = [ "check_for_update", @@ -9,5 +15,6 @@ "hash_file", "hide_stderr", "is_pypi_install", + "pose_file_stem", "to_safe_name", ] diff --git a/packages/jabs-core/src/jabs/core/utils/utilities.py b/packages/jabs-core/src/jabs/core/utils/utilities.py index 5b2b7b84..bb64e994 100644 --- a/packages/jabs-core/src/jabs/core/utils/utilities.py +++ b/packages/jabs-core/src/jabs/core/utils/utilities.py @@ -65,6 +65,27 @@ def get_bool_env_var(var_name, default_value=False) -> bool: return value.lower() in ("true", "1", "yes", "on", "y", "t") +_POSE_SUFFIX_RE = re.compile(r"_pose_est_v\d+$") + + +def pose_file_stem(path: str | Path) -> str: + """Return the base name of a pose file with the ``_pose_est_vN`` suffix removed. + + For example, ``"video_pose_est_v6.h5"`` becomes ``"video"``. If the input does + not include the ``_pose_est_vN`` suffix, the stem is returned unchanged + (this allows callers to pass either a pose file path or a video file path + and get a consistent identifier). + + Args: + path: A pose file path (or any path-like value) whose stem may include + the ``_pose_est_vN`` suffix. + + Returns: + The stem with any trailing ``_pose_est_vN`` suffix stripped. + """ + return _POSE_SUFFIX_RE.sub("", Path(path).stem) + + def to_safe_name(behavior: str) -> str: """Create a version of the given behavior name that is safe to use in filenames. diff --git a/packages/jabs-core/tests/test_utilities.py b/packages/jabs-core/tests/test_utilities.py new file mode 100644 index 00000000..8202493f --- /dev/null +++ b/packages/jabs-core/tests/test_utilities.py @@ -0,0 +1,52 @@ +"""Unit tests for jabs.core.utils.utilities module.""" + +from pathlib import Path + +import pytest + +from jabs.core.utils import pose_file_stem + + +@pytest.mark.parametrize( + ("path", "expected"), + [ + ("video_pose_est_v6.h5", "video"), + ("video_pose_est_v2.h5", "video"), + ("video_pose_est_v12.h5", "video"), + ("/some/dir/video_pose_est_v6.h5", "video"), + ("video.mp4", "video"), + ("video.avi", "video"), + ("nested_name_pose_est_v8.h5", "nested_name"), + ("no_suffix.h5", "no_suffix"), + ("plain_name", "plain_name"), + ], + ids=[ + "pose-v6", + "pose-v2", + "pose-v12", + "pose-with-dir", + "video-mp4", + "video-avi", + "nested-underscore-name", + "h5-no-pose-suffix", + "no-extension", + ], +) +def test_pose_file_stem(path: str, expected: str) -> None: + """Pose-file suffix is stripped and other names are returned unchanged.""" + assert pose_file_stem(path) == expected + + +def test_pose_file_stem_accepts_path() -> None: + """``pathlib.Path`` inputs are supported.""" + assert pose_file_stem(Path("/a/b/video_pose_est_v6.h5")) == "video" + + +def test_pose_file_stem_video_and_pose_match() -> None: + """A video file and its matching pose file produce the same stem. + + This is what guarantees feature-cache directories are consistent whether the + caller passes the pose file (jabs-classify, jabs-cli compute-features) or the + video file (jabs-init, GUI). + """ + assert pose_file_stem("video.mp4") == pose_file_stem("video_pose_est_v6.h5") diff --git a/src/jabs/feature_extraction/features.py b/src/jabs/feature_extraction/features.py index 3bdd6d5a..d4332d36 100644 --- a/src/jabs/feature_extraction/features.py +++ b/src/jabs/feature_extraction/features.py @@ -9,6 +9,7 @@ from jabs.core.enums import CacheFormat from jabs.core.exceptions import DistanceScaleException, FeatureVersionException from jabs.core.types import FeatureCacheMetadata, PerFrameCacheData +from jabs.core.utils import pose_file_stem from jabs.io.feature_cache import clear_cache, detect_cache_format from jabs.io.feature_cache.base import FeatureCacheReader, FeatureCacheWriter from jabs.io.feature_cache.hdf5 import HDF5FeatureCacheReader, HDF5FeatureCacheWriter @@ -47,6 +48,33 @@ } +def _migrate_legacy_cache_dir(directory: Path, source_file: str | Path) -> None: + """One-shot rename of a legacy ``_pose_est_vN`` cache dir to ````. + + Earlier versions of jabs-classify and jabs-cli compute-features wrote feature + caches to a subdirectory whose name kept the ``_pose_est_vN`` suffix from the + pose filename, while jabs-init / the GUI used the bare video stem. We now + normalize on the bare stem; this helper renames an existing legacy directory + so previously-computed caches are still found. + + The rename is skipped (and the legacy directory left alone) if any of: + * ``source_file`` does not carry a ``_pose_est_vN`` suffix. + * The legacy directory does not exist. + * The normalized destination already exists (avoids collisions when two + pose files in the same dir would normalize to the same stem). + """ + raw_stem = Path(source_file).stem + normalized_stem = pose_file_stem(source_file) + if raw_stem == normalized_stem: + return + legacy = directory / raw_stem + normalized = directory / normalized_stem + if not legacy.is_dir() or normalized.exists(): + return + logger.info("renaming legacy feature cache subdirectory %s -> %s", legacy, normalized) + legacy.rename(normalized) + + def _normalize_op_settings(settings: dict) -> dict: """Normalize op_settings so all _BASE_FILTERS keys map to ``dict[str, bool]``. @@ -129,7 +157,13 @@ def __init__( if directory is None: self._identity_feature_dir = None else: - base = Path(directory) / Path(source_file).stem + # Normalize the cache subdirectory so paths derived from a pose file + # (e.g. "video_pose_est_v6.h5") match those derived from a video file + # (e.g. "video.mp4"). Without this, jabs-classify / jabs-cli + # compute-features wrote to "/video_pose_est_v6/" while the GUI + # / jabs-init wrote to "/video/". + _migrate_legacy_cache_dir(Path(directory), source_file) + base = Path(directory) / pose_file_stem(source_file) if include_pose_hash: base = base / self._pose_hash self._identity_feature_dir = base / str(self._identity) diff --git a/src/jabs/scripts/classify.py b/src/jabs/scripts/classify.py index d6492eea..42cca99f 100755 --- a/src/jabs/scripts/classify.py +++ b/src/jabs/scripts/classify.py @@ -7,7 +7,6 @@ """ import argparse -import re import sys from pathlib import Path @@ -18,6 +17,7 @@ from jabs.classifier import Classifier from jabs.core.constants import APP_NAME from jabs.core.enums import CacheFormat +from jabs.core.utils import pose_file_stem from jabs.feature_extraction import IdentityFeatures from jabs.pose_estimation import open_pose_file from jabs.project.prediction_manager import PredictionManager @@ -28,18 +28,6 @@ __CLASSIFIER_CHOICES = Classifier().classifier_choices() -def get_pose_stem(pose_path: Path): - """get the stem name of a pose file - - takes a pose path as input and returns the name component with the '_pose_est_v#.h5' suffix removed - """ - m = re.match(r"^(.+)(_pose_est_v[0-9]+\.h5)$", pose_path.name) - if m: - return m.group(1) - else: - raise ValueError(f"{pose_path} is not a valid pose file path") - - def train_and_classify( training_file_path: Path, input_pose_file: Path, @@ -105,7 +93,7 @@ def classify_pose( use_pose_hash (bool, optional): Include pose file hash as a subdirectory in the cache path. Defaults to False. """ pose_est = open_pose_file(input_pose_file) - pose_stem = get_pose_stem(input_pose_file) + pose_stem = pose_file_stem(input_pose_file) # allocate numpy arrays to write to h5 file prediction_labels = np.full((pose_est.num_identities, pose_est.num_frames), -1, dtype=np.int8) diff --git a/tests/feature_extraction/test_identity_features.py b/tests/feature_extraction/test_identity_features.py index 754a16b2..90afecb0 100644 --- a/tests/feature_extraction/test_identity_features.py +++ b/tests/feature_extraction/test_identity_features.py @@ -11,6 +11,7 @@ import jabs.pose_estimation as pose_est_module from jabs.core.enums import CacheFormat +from jabs.core.utils import pose_file_stem from jabs.feature_extraction.features import IdentityFeatures from jabs.io.feature_cache import detect_cache_format @@ -177,7 +178,7 @@ def test_force_with_format_change_removes_stale_sentinel(tmp_path, pose_est_v5) """ # Write an initial Parquet cache. _make_identity_features(pose_est_v5, tmp_path, force=True, cache_format=CacheFormat.PARQUET) - identity_dir = tmp_path / Path(_SOURCE_FILE).stem / str(_IDENTITY) + identity_dir = tmp_path / pose_file_stem(_SOURCE_FILE) / str(_IDENTITY) assert (identity_dir / "metadata.json").exists() # Force-recompute into HDF5 format. @@ -192,3 +193,86 @@ def test_force_with_format_change_removes_stale_sentinel(tmp_path, pose_est_v5) from jabs.io.feature_cache import detect_cache_format assert detect_cache_format(identity_dir) == CacheFormat.HDF5 + + +def test_feature_dir_matches_for_pose_and_video_source(tmp_path, pose_est_v5) -> None: + """Identity feature directory is the same for pose and video source filenames. + + The cache path must not depend on whether the caller passes the pose filename + (jabs-classify / jabs-cli compute-features) or the video filename (jabs-init / + GUI). Both must resolve to the same directory. + """ + pose_source = IdentityFeatures( + source_file="sample_pose_est_v5.h5", + identity=_IDENTITY, + directory=tmp_path, + pose_est=pose_est_v5, + op_settings={}, + ) + video_source = IdentityFeatures( + source_file="sample.mp4", + identity=_IDENTITY, + directory=tmp_path, + pose_est=pose_est_v5, + op_settings={}, + ) + + assert pose_source._identity_feature_dir == video_source._identity_feature_dir + assert pose_source._identity_feature_dir == tmp_path / "sample" / str(_IDENTITY) + + +def test_legacy_cache_dir_is_renamed(tmp_path, pose_est_v5, caplog) -> None: + """A legacy ``_pose_est_vN`` cache dir is renamed to ```` on construction. + + Cached features computed by the previous CLI layout must remain discoverable + after the normalization fix. + """ + legacy = tmp_path / "sample_pose_est_v5" + legacy_identity = legacy / str(_IDENTITY) + legacy_identity.mkdir(parents=True) + sentinel = legacy_identity / "features.h5" + sentinel.touch() + + with caplog.at_level("INFO", logger="jabs.feature_extraction.features"): + instance = _make_identity_features(pose_est_v5, tmp_path, force=False) + + normalized = tmp_path / "sample" + assert not legacy.exists(), "legacy dir should be renamed away" + assert (normalized / str(_IDENTITY) / "features.h5").exists() + assert instance._identity_feature_dir == normalized / str(_IDENTITY) + assert any("renaming legacy feature cache" in r.message for r in caplog.records) + + +def test_legacy_cache_dir_left_alone_on_collision(tmp_path, pose_est_v5) -> None: + """If the normalized destination already exists, the legacy dir is not renamed. + + This protects against collisions when multiple pose files in the same directory + would normalize to the same stem (e.g. ``sample_pose_est_v5.h5`` and + ``sample_pose_est_v6.h5`` both normalize to ``sample``). + """ + legacy = tmp_path / "sample_pose_est_v5" + legacy.mkdir() + (legacy / "marker").touch() + + normalized = tmp_path / "sample" + normalized.mkdir() + (normalized / "other_marker").touch() + + _make_identity_features(pose_est_v5, tmp_path, force=False) + + assert legacy.exists(), "legacy dir must be preserved on collision" + assert (legacy / "marker").exists() + assert (normalized / "other_marker").exists() + + +def test_no_rename_when_video_stem_used(tmp_path, pose_est_v5) -> None: + """If the source filename has no ``_pose_est_vN`` suffix, no rename is attempted.""" + instance = IdentityFeatures( + source_file="sample.mp4", + identity=_IDENTITY, + directory=tmp_path, + pose_est=pose_est_v5, + op_settings={}, + ) + + assert instance._identity_feature_dir == tmp_path / "sample" / str(_IDENTITY) From d2ba628dece840f71a8979de03f579d7d70807c1 Mon Sep 17 00:00:00 2001 From: Glen Beane <356266+gbeane@users.noreply.github.com> Date: Tue, 19 May 2026 14:04:42 -0400 Subject: [PATCH 2/2] address review: harden migration rename, restore pose-name validation - Wrap legacy cache-dir rename in try/except OSError; log a warning and continue so a permissions/filesystem error doesn't abort feature extraction. - Restore the *_pose_est_vN.h5 filename check that the old get_pose_stem provided in jabs-classify, via a new _require_pose_file_name helper. Keeps pose_file_stem as the lenient normalization helper while preserving the CLI's early-error behavior. --- src/jabs/feature_extraction/features.py | 14 ++++++- src/jabs/scripts/classify.py | 15 ++++++++ .../test_identity_features.py | 23 +++++++++++ tests/scripts/test_classify.py | 38 +++++++++++++++++++ 4 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 tests/scripts/test_classify.py diff --git a/src/jabs/feature_extraction/features.py b/src/jabs/feature_extraction/features.py index d4332d36..f4a19ac5 100644 --- a/src/jabs/feature_extraction/features.py +++ b/src/jabs/feature_extraction/features.py @@ -72,7 +72,19 @@ def _migrate_legacy_cache_dir(directory: Path, source_file: str | Path) -> None: if not legacy.is_dir() or normalized.exists(): return logger.info("renaming legacy feature cache subdirectory %s -> %s", legacy, normalized) - legacy.rename(normalized) + try: + legacy.rename(normalized) + except OSError: + # Best-effort migration: a permissions or filesystem error here would + # only cost a cache recompute, so log and continue rather than aborting + # feature extraction. + logger.warning( + "failed to rename legacy feature cache subdirectory %s -> %s; " + "features will be recomputed", + legacy, + normalized, + exc_info=True, + ) def _normalize_op_settings(settings: dict) -> dict: diff --git a/src/jabs/scripts/classify.py b/src/jabs/scripts/classify.py index 42cca99f..6ce8774a 100755 --- a/src/jabs/scripts/classify.py +++ b/src/jabs/scripts/classify.py @@ -7,6 +7,7 @@ """ import argparse +import re import sys from pathlib import Path @@ -24,10 +25,23 @@ DEFAULT_FPS = 30 +_POSE_FILE_NAME_RE = re.compile(r"^.+_pose_est_v[0-9]+\.h5$") + # find out which classifiers are supported in this environment __CLASSIFIER_CHOICES = Classifier().classifier_choices() +def _require_pose_file_name(pose_path: Path) -> None: + """Validate that the filename matches the canonical ``*_pose_est_vN.h5`` pattern. + + Raises: + ValueError: If the filename does not match the canonical pose-file + pattern. + """ + if not _POSE_FILE_NAME_RE.match(pose_path.name): + raise ValueError(f"{pose_path} is not a valid pose file path") + + def train_and_classify( training_file_path: Path, input_pose_file: Path, @@ -92,6 +106,7 @@ def classify_pose( cache_window (bool, optional): Whether to cache window features. Defaults to False. use_pose_hash (bool, optional): Include pose file hash as a subdirectory in the cache path. Defaults to False. """ + _require_pose_file_name(input_pose_file) pose_est = open_pose_file(input_pose_file) pose_stem = pose_file_stem(input_pose_file) diff --git a/tests/feature_extraction/test_identity_features.py b/tests/feature_extraction/test_identity_features.py index 90afecb0..68cae907 100644 --- a/tests/feature_extraction/test_identity_features.py +++ b/tests/feature_extraction/test_identity_features.py @@ -265,6 +265,29 @@ def test_legacy_cache_dir_left_alone_on_collision(tmp_path, pose_est_v5) -> None assert (normalized / "other_marker").exists() +def test_legacy_rename_failure_is_non_fatal(tmp_path, pose_est_v5, caplog, monkeypatch) -> None: + """A rename failure logs a warning and lets construction proceed. + + Best-effort migration: an OS-level rename error must not abort feature + extraction. The worst case is a recomputed cache. + """ + legacy = tmp_path / "sample_pose_est_v5" + legacy.mkdir() + (legacy / "marker").touch() + + def _raise(self, *args, **kwargs): + raise PermissionError("simulated rename failure") + + monkeypatch.setattr(Path, "rename", _raise) + + with caplog.at_level("WARNING", logger="jabs.feature_extraction.features"): + instance = _make_identity_features(pose_est_v5, tmp_path, force=False) + + assert instance._identity_feature_dir == tmp_path / "sample" / str(_IDENTITY) + assert legacy.exists(), "legacy dir untouched after failed rename" + assert any("failed to rename" in r.message for r in caplog.records) + + def test_no_rename_when_video_stem_used(tmp_path, pose_est_v5) -> None: """If the source filename has no ``_pose_est_vN`` suffix, no rename is attempted.""" instance = IdentityFeatures( diff --git a/tests/scripts/test_classify.py b/tests/scripts/test_classify.py new file mode 100644 index 00000000..70a4e6d6 --- /dev/null +++ b/tests/scripts/test_classify.py @@ -0,0 +1,38 @@ +"""Unit tests for jabs.scripts.classify helpers.""" + +from pathlib import Path + +import pytest + +from jabs.scripts.classify import _require_pose_file_name + + +@pytest.mark.parametrize( + "name", + [ + "sample_pose_est_v2.h5", + "sample_pose_est_v6.h5", + "sample_pose_est_v12.h5", + "nested_name_pose_est_v8.h5", + ], +) +def test_require_pose_file_name_accepts_canonical(name: str) -> None: + """Canonical ``*_pose_est_vN.h5`` filenames pass validation.""" + _require_pose_file_name(Path("/some/dir") / name) + + +@pytest.mark.parametrize( + "name", + [ + "sample.mp4", + "sample.h5", + "sample_v6.h5", + "sample_pose_est.h5", + "sample_pose_est_v6.hdf5", + "predictions_pose_est_v6.h5.bak", + ], +) +def test_require_pose_file_name_rejects_invalid(name: str) -> None: + """Names that do not match ``*_pose_est_vN.h5`` are rejected.""" + with pytest.raises(ValueError, match="not a valid pose file path"): + _require_pose_file_name(Path(name))