From d38438decfe113033247a4bd08d8dad037f4a680 Mon Sep 17 00:00:00 2001 From: Alexander Berger Date: Fri, 5 Dec 2025 15:47:45 -0500 Subject: [PATCH 1/3] Coerce frame interval columns to int before exploding --- pyproject.toml | 2 +- src/jabs_postprocess/compare_gt.py | 4 ++++ uv.lock | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e65533e..e8fdba6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "jabs-postprocess" -version = "0.5.2" +version = "0.5.3" description = "A python library for JABS postprocessing utilities." readme = "README.md" license = "LicenseRef-PLATFORM-LICENSE-AGREEMENT-FOR-NON-COMMERCIAL-USE" diff --git a/src/jabs_postprocess/compare_gt.py b/src/jabs_postprocess/compare_gt.py index 1dfef70..8ef4ea5 100644 --- a/src/jabs_postprocess/compare_gt.py +++ b/src/jabs_postprocess/compare_gt.py @@ -498,6 +498,10 @@ def generate_output_paths(results_folder: Path): def _expand_intervals_to_frames(df): """Expand behavior intervals into per-frame rows.""" expanded = df.copy() + # Ensure integer frame boundaries so range() receives ints even if upstream data was cast to float + for col in ["animal_idx", "start", "duration"]: + if col in expanded.columns: + expanded[col] = pd.to_numeric(expanded[col], errors="coerce").fillna(0).astype(int) expanded["frame"] = expanded.apply( lambda row: range(row["start"], row["start"] + row["duration"]), axis=1 ) diff --git a/uv.lock b/uv.lock index e1d23cd..38b93d0 100644 --- a/uv.lock +++ b/uv.lock @@ -512,7 +512,7 @@ wheels = [ [[package]] name = "jabs-postprocess" -version = "0.5.2" +version = "0.5.3" source = { editable = "." } dependencies = [ { name = "black" }, From bf1785ffceb1689bcbf97bf5141f3aadd418ebf5 Mon Sep 17 00:00:00 2001 From: Alexander Berger Date: Wed, 7 Jan 2026 10:30:20 -0500 Subject: [PATCH 2/3] Fix formatting --- src/jabs_postprocess/compare_gt.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/jabs_postprocess/compare_gt.py b/src/jabs_postprocess/compare_gt.py index 8ef4ea5..4a54b3e 100644 --- a/src/jabs_postprocess/compare_gt.py +++ b/src/jabs_postprocess/compare_gt.py @@ -501,7 +501,9 @@ def _expand_intervals_to_frames(df): # Ensure integer frame boundaries so range() receives ints even if upstream data was cast to float for col in ["animal_idx", "start", "duration"]: if col in expanded.columns: - expanded[col] = pd.to_numeric(expanded[col], errors="coerce").fillna(0).astype(int) + expanded[col] = ( + pd.to_numeric(expanded[col], errors="coerce").fillna(0).astype(int) + ) expanded["frame"] = expanded.apply( lambda row: range(row["start"], row["start"] + row["duration"]), axis=1 ) From c12544237739fe20c0678a1a5ce08b088664a2a3 Mon Sep 17 00:00:00 2001 From: Alexander Berger Date: Wed, 7 Jan 2026 11:04:05 -0500 Subject: [PATCH 3/3] Raise ValueError on NaN values instead od coercing to 0 --- src/jabs_postprocess/compare_gt.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/jabs_postprocess/compare_gt.py b/src/jabs_postprocess/compare_gt.py index 4a54b3e..3cdc47e 100644 --- a/src/jabs_postprocess/compare_gt.py +++ b/src/jabs_postprocess/compare_gt.py @@ -499,11 +499,18 @@ def _expand_intervals_to_frames(df): """Expand behavior intervals into per-frame rows.""" expanded = df.copy() # Ensure integer frame boundaries so range() receives ints even if upstream data was cast to float + # (e.g., when concatenating empty int DataFrames with dict-based DataFrames, pandas upcasts to float) for col in ["animal_idx", "start", "duration"]: if col in expanded.columns: - expanded[col] = ( - pd.to_numeric(expanded[col], errors="coerce").fillna(0).astype(int) - ) + # Check for NaN values which indicate data quality issues + if expanded[col].isna().any(): + raise ValueError( + f"Column '{col}' contains NaN values. " + f"Expected valid numeric values for frame interval calculation." + ) + # Convert to int, allowing for float values that can be safely cast + # (e.g., 5.0 -> 5, but 5.5 would truncate to 5) + expanded[col] = expanded[col].astype(int) expanded["frame"] = expanded.apply( lambda row: range(row["start"], row["start"] + row["duration"]), axis=1 )