Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/idc_index_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
logger = logging.getLogger(__name__)

__all__ = [
"ANALYSIS_RESULTS_INDEX_PARQUET_FILEPATH",
"COLLECTIONS_INDEX_PARQUET_FILEPATH",
"IDC_INDEX_CSV_ARCHIVE_FILEPATH",
"IDC_INDEX_PARQUET_FILEPATH",
"INDEX_METADATA",
Expand Down Expand Up @@ -78,6 +80,12 @@ def _load_text(path: Path | None) -> str | None:
PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH: Path | None = _lookup(
"idc_index_data/prior_versions_index.parquet"
)
COLLECTIONS_INDEX_PARQUET_FILEPATH: Path | None = _lookup(
"idc_index_data/collections_index.parquet"
)
ANALYSIS_RESULTS_INDEX_PARQUET_FILEPATH: Path | None = _lookup(
"idc_index_data/analysis_results_index.parquet"
)

# Build unified metadata dictionary for all 7 indices
_ALL_INDICES = [
Expand Down
40 changes: 17 additions & 23 deletions tests/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,27 +53,16 @@ def test_parquet_files_are_bundled():
include parquet files during the build process. These files are required
(not optional) and should always be present after installation.
"""
# Main index parquet file must be present (not optional)
assert m.IDC_INDEX_PARQUET_FILEPATH is not None, (
"idc_index.parquet must be included in the package"
)
assert m.IDC_INDEX_PARQUET_FILEPATH.exists(), (
f"idc_index.parquet not found at {m.IDC_INDEX_PARQUET_FILEPATH}"
)
assert m.IDC_INDEX_PARQUET_FILEPATH.is_file(), (
"idc_index.parquet must be a file, not a directory"
)

# Prior versions index parquet file must be present (not optional)
assert m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH is not None, (
"prior_versions_index.parquet must be included in the package"
)
assert m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH.exists(), (
f"prior_versions_index.parquet not found at {m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH}"
)
assert m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH.is_file(), (
"prior_versions_index.parquet must be a file, not a directory"
)
required = {
"idc_index.parquet": m.IDC_INDEX_PARQUET_FILEPATH,
"prior_versions_index.parquet": m.PRIOR_VERSIONS_INDEX_PARQUET_FILEPATH,
"collections_index.parquet": m.COLLECTIONS_INDEX_PARQUET_FILEPATH,
"analysis_results_index.parquet": m.ANALYSIS_RESULTS_INDEX_PARQUET_FILEPATH,
}
for name, path in required.items():
assert path is not None, f"{name} must be included in the package"
assert path.exists(), f"{name} not found at {path}"
assert path.is_file(), f"{name} must be a file, not a directory"


def test_parquet_files_are_readable():
Expand Down Expand Up @@ -274,12 +263,17 @@ def test_index_metadata_consistency():


def test_index_metadata_main_indices_bundled():
"""Test that main indices (idc_index, prior_versions_index) have parquet and schemas.
"""Test that main indices have parquet and schemas.

These are the core indices that should always have parquet, schema and SQL files
bundled in the package when built with default settings.
"""
main_indices = ["idc_index", "prior_versions_index"]
main_indices = [
"idc_index",
"prior_versions_index",
"collections_index",
"analysis_results_index",
]

for index_name in main_indices:
metadata = m.INDEX_METADATA[index_name]
Expand Down
Loading