From a99049081bd37161bb9e32a6591455039ceb443e Mon Sep 17 00:00:00 2001 From: Andrey Fedorov Date: Fri, 1 May 2026 17:56:48 -0400 Subject: [PATCH] ENH: bundle collections_index and analysis_results_index in package Add named path exports for collections_index and analysis_results_index to match the pattern of idc_index and prior_versions_index. Extend test_parquet_files_are_bundled and test_index_metadata_main_indices_bundled to enforce that all four core indexes are always present in the installed package. Co-Authored-By: Claude Sonnet 4.6 --- src/idc_index_data/__init__.py | 8 +++++++ tests/test_package.py | 40 +++++++++++++++------------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/idc_index_data/__init__.py b/src/idc_index_data/__init__.py index 25226ac..05b5a48 100644 --- a/src/idc_index_data/__init__.py +++ b/src/idc_index_data/__init__.py @@ -16,6 +16,8 @@ logger = logging.getLogger(__name__) __all__ = [ + "ANALYSIS_RESULTS_INDEX_PARQUET_FILEPATH", + "COLLECTIONS_INDEX_PARQUET_FILEPATH", "IDC_INDEX_CSV_ARCHIVE_FILEPATH", "IDC_INDEX_PARQUET_FILEPATH", "INDEX_METADATA", @@ -78,6 +80,12 @@ def _load_text(path: Path | None) -> str | None: PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH: Path | None = _lookup( "idc_index_data/prior_versions_index.parquet" ) +COLLECTIONS_INDEX_PARQUET_FILEPATH: Path | None = _lookup( + "idc_index_data/collections_index.parquet" +) +ANALYSIS_RESULTS_INDEX_PARQUET_FILEPATH: Path | None = _lookup( + "idc_index_data/analysis_results_index.parquet" +) # Build unified metadata dictionary for all 7 indices _ALL_INDICES = [ diff --git a/tests/test_package.py b/tests/test_package.py index 12c97a7..44f6ea3 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -53,27 +53,16 @@ def test_parquet_files_are_bundled(): include parquet files during the build process. These files are required (not optional) and should always be present after installation. """ - # Main index parquet file must be present (not optional) - assert m.IDC_INDEX_PARQUET_FILEPATH is not None, ( - "idc_index.parquet must be included in the package" - ) - assert m.IDC_INDEX_PARQUET_FILEPATH.exists(), ( - f"idc_index.parquet not found at {m.IDC_INDEX_PARQUET_FILEPATH}" - ) - assert m.IDC_INDEX_PARQUET_FILEPATH.is_file(), ( - "idc_index.parquet must be a file, not a directory" - ) - - # Prior versions index parquet file must be present (not optional) - assert m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH is not None, ( - "prior_versions_index.parquet must be included in the package" - ) - assert m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH.exists(), ( - f"prior_versions_index.parquet not found at {m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH}" - ) - assert m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH.is_file(), ( - "prior_versions_index.parquet must be a file, not a directory" - ) + required = { + "idc_index.parquet": m.IDC_INDEX_PARQUET_FILEPATH, + "prior_versions_index.parquet": m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH, + "collections_index.parquet": m.COLLECTIONS_INDEX_PARQUET_FILEPATH, + "analysis_results_index.parquet": m.ANALYSIS_RESULTS_INDEX_PARQUET_FILEPATH, + } + for name, path in required.items(): + assert path is not None, f"{name} must be included in the package" + assert path.exists(), f"{name} not found at {path}" + assert path.is_file(), f"{name} must be a file, not a directory" def test_parquet_files_are_readable(): @@ -274,12 +263,17 @@ def test_index_metadata_consistency(): def test_index_metadata_main_indices_bundled(): - """Test that main indices (idc_index, prior_versions_index) have parquet and schemas. + """Test that main indices have parquet and schemas. These are the core indices that should always have parquet, schema and SQL files bundled in the package when built with default settings. """ - main_indices = ["idc_index", "prior_versions_index"] + main_indices = [ + "idc_index", + "prior_versions_index", + "collections_index", + "analysis_results_index", + ] for index_name in main_indices: metadata = m.INDEX_METADATA[index_name]