diff --git a/.github/workflows/pr.yml b/.github/workflows/pr.yml index 488fe83cf..2f0348923 100644 --- a/.github/workflows/pr.yml +++ b/.github/workflows/pr.yml @@ -14,24 +14,17 @@ jobs: uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v5 - - name: Install ruff - run: pip install ruff>=0.9.0 + - name: Install uv + run: python -m pip install uv - name: Format check with ruff - run: ruff format --check . + run: make lint check-changelog: name: Check changelog fragment runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Check for changelog fragment - run: | - FRAGMENTS=$(find changelog.d -type f ! -name '.gitkeep' | wc -l) - if [ "$FRAGMENTS" -eq 0 ]; then - echo "::error::No changelog fragment found in changelog.d/" - echo "Add one with: echo 'Description.' > changelog.d/\$(git branch --show-current)..md" - echo "Types: added, changed, fixed, removed, breaking" - exit 1 - fi + run: make check-changelog test_container_builds: name: Docker runs-on: ubuntu-latest @@ -65,10 +58,12 @@ jobs: credentials_json: ${{ secrets.GCP_SA_KEY }} - name: Wait until policyengine_us version is available on PyPI run: .github/wait-for-pypi.sh - - name: Install dependencies - run: make install + - name: Install uv + run: python -m pip install uv + - name: Bootstrap development environment + run: make bootstrap-dev - name: Run environment variable tests - run: pytest tests/env_variables/test_environment_variables.py + run: make test-env-vars env: POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN }} HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} @@ -96,8 +91,10 @@ jobs: uses: google-github-actions/setup-gcloud@v2 with: project_id: policyengine-api - - name: Install dependencies - run: make install + - name: Install uv + run: python -m pip install uv + - name: Bootstrap development environment + run: make bootstrap-dev - name: Test the API run: make test env: diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..ab87f9eeb --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,13 @@ +# PolicyEngine API + +## Local workflow + +- In a fresh checkout or worktree, run `make bootstrap-dev`. +- Prefer repo-native commands over hardcoded tool paths. +- Use `make lint`, `make format`, `make check-changelog`, `make pre-pr`, and `make test`. +- Do not invoke tools via paths like `./.venv/bin/ruff` or `./.venv/bin/pytest`. + +## Pull requests + +- Run `make pre-pr` before pushing a PR update. +- Add a changelog fragment in `changelog.d/` using one of: `added`, `changed`, `fixed`, `removed`, `breaking`. diff --git a/Makefile b/Makefile index d8265f9e6..803836ebe 100644 --- a/Makefile +++ b/Makefile @@ -1,22 +1,52 @@ +PYTHON_VERSION ?= 3.12 +VENV_PYTHON := .venv/bin/python + +.PHONY: bootstrap-dev install debug test-env-vars test debug-test lint format check-changelog pre-pr ci deploy changelog + +bootstrap-dev: + uv venv --seed --python $(PYTHON_VERSION) + .venv/bin/python -m pip install -e ".[dev]" + bash .github/setup_env.sh + install: - pip install -e ".[dev]" + python -m pip install -e ".[dev]" bash .github/setup_env.sh debug: - FLASK_APP=policyengine_api.api FLASK_DEBUG=1 flask run --without-threads + FLASK_APP=policyengine_api.api FLASK_DEBUG=1 $(VENV_PYTHON) -m flask run --without-threads test-env-vars: - pytest tests/env_variables + $(VENV_PYTHON) -m pytest tests/env_variables test: - MAX_HOUSEHOLDS=1000 coverage run -a --branch -m pytest tests/to_refactor tests/unit --disable-pytest-warnings - coverage xml -i + MAX_HOUSEHOLDS=1000 $(VENV_PYTHON) -m coverage run -a --branch -m pytest tests/to_refactor tests/unit --disable-pytest-warnings + $(VENV_PYTHON) -m coverage xml -i debug-test: - MAX_HOUSEHOLDS=1000 FLASK_DEBUG=1 pytest -vv --durations=0 tests + MAX_HOUSEHOLDS=1000 FLASK_DEBUG=1 $(VENV_PYTHON) -m pytest -vv --durations=0 tests + +lint: + uvx --from 'ruff>=0.9.0' ruff format --check . format: - ruff format . + uvx --from 'ruff>=0.9.0' ruff format . + +check-changelog: + @FRAGMENTS=$$(find changelog.d -type f ! -name '.gitkeep' | wc -l); \ + if [ "$$FRAGMENTS" -eq 0 ]; then \ + echo "No changelog fragment found in changelog.d/"; \ + echo "Add one with: echo 'Description.' > changelog.d/$$(git branch --show-current)..md"; \ + echo "Types: added, changed, fixed, removed, breaking"; \ + exit 1; \ + fi + +pre-pr: + $(MAKE) lint + $(MAKE) check-changelog + +ci: + $(MAKE) pre-pr + $(MAKE) test deploy: python gcp/export.py @@ -29,5 +59,5 @@ deploy: rm .dbpw changelog: - python .github/bump_version.py - towncrier build --yes --version $$(python -c "import re; print(re.search(r'version = \"(.+?)\"', open('pyproject.toml').read()).group(1))") + $(VENV_PYTHON) .github/bump_version.py + $(VENV_PYTHON) -m towncrier build --yes --version $$($(VENV_PYTHON) -c "import re; print(re.search(r'version = \"(.+?)\"', open('pyproject.toml').read()).group(1))") diff --git a/README.md b/README.md index 31480e882..cb02a96f4 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ This is the official back-end service of PolicyEngine, a non-profit with the mis # Prerequisites -Running or editing the API locally will require a Python virtual environment, either through the Python `venv` command or a secondary package like `conda`. For more information on how to do this, check out the documentation for `venv` [here](https://docs.python.org/3/library/venv.html) and this overview blog post for `conda` [here](https://uoa-eresearch.github.io/eresearch-cookbook/recipe/2014/11/20/conda/). +Running or editing the API locally requires Python and a virtual environment. Prefer [`uv`](https://docs.astral.sh/uv/) for local setup, especially in fresh worktrees. -Python 3.10 or 3.11 is required. +`make bootstrap-dev` uses Python 3.12 to match CI. # Contributing @@ -26,15 +26,21 @@ git clone https://github.com/PolicyEngine/policyengine-api.git To contribute, clone the repository instead of forking it and then request to be added as a contributor. Create a new branch and get started! -### 2. Activate your virtual environment +### 2. Bootstrap a fresh checkout or worktree -### 3. Install dependencies +``` +make bootstrap-dev +``` + +This creates a local virtual environment, installs development dependencies, and copies `.env.example` to `.env` if needed. + +If you already have an activated environment and just need dependencies, you can still run: ``` make install ``` -### 4. Start a server on localhost to see your changes +### 3. Start a server on localhost to see your changes Run: @@ -71,7 +77,7 @@ A simple API get call you can send in Postman to make sure it is working as expe http://127.0.0.1:5001/us/policy/2 ``` -### 5. To test in combination with policyengine-app: +### 4. To test in combination with policyengine-app: 1. In policyengine-app/src/api/call.js, comment out @@ -90,7 +96,7 @@ const POLICYENGINE_API = "http://127.0.0.1:5001" (or the relevant port where the NOTE: Any output that needs to be calculated will not work. Therefore, only household output can be tested with this setup. -### 6. Testing calculations +### 5. Testing calculations To test anything that utilizes Redis or the API's service workers (e.g. anything that requires society-wide calculations with the policy calculator), you'll also need to complete the following steps: @@ -124,40 +130,76 @@ You've finished your contribution, but now what? Before opening a PR, we ask con ### Step 1: Testing -To test your changes against our series of automated tests, run +For the fast PR preflight checks that catch the most common CI failures, run: ``` -make debug-test +make pre-pr ``` -NOTE: Running the command `make test` will fail, as this command is utilized by the deployed app to run tests and requires passwords to the production database. +This runs: + +- `make lint` +- `make check-changelog` + +To run the main automated test suite locally, run: + +``` +make test +``` + +This suite mirrors the main CI test job and may require the same credentials and environment variables. + +If you want the broader local test run with verbose output while iterating, run: + +``` +make debug-test +``` We require that you add tests for any new features or bugfixes. Our tests are written in the Python standard, [Pytest](https://docs.pytest.org/en/7.1.x/getting-started.html), and will be run again against the production environment, as well. ### Step 2: Formatting -In addition to the tests, we use [Black](https://github.com/psf/black) to lint our codebase, so before opening a pull request, Step 2 is to lint the code by running +In addition to the tests, we use [Ruff](https://docs.astral.sh/ruff/) for formatting. To auto-format the repo, run: ``` make format ``` -This will automatically format the code for you; no need to do anything else. +To run the same format check used in CI without rewriting files, run: + +``` +make lint +``` ### Step 3: Changelogging -Finally, we ask contributors to make it clear for our users what changes have been made by contributing to a changelog. This changelog is formatted in YAML and describes the changes you've made to the code. This should follow the below format: +Finally, we ask contributors to make it clear for our users what changes have been made by adding a changelog fragment in `changelog.d/`. + +Use one of these suffixes: + +- `added` +- `changed` +- `fixed` +- `removed` +- `breaking` + +For example: + +``` +echo "Honor explicit economy dataset selection in society-wide calculations." > changelog.d/my-branch.fixed.md +``` + +To check that a fragment exists before you push, run: ``` -- bump: {major, minor, patch} - changes: - {added, removed, changed, fixed}: - - +make check-changelog ``` -For more info on the syntax, check out the [semantic versioning docs](https://www.semver.org) and [keep a changelog](https://www.keepachangelog.com). +## Fresh Worktree Notes -Write your changelog info into the empty file called `changelog_entry.yaml`. When you open your PR, this will automatically be added to the overall changelog. +- Prefer `make` targets or `python -m ...` commands over hardcoded paths like `./.venv/bin/pytest`. +- In a new worktree, run `make bootstrap-dev` before trying to lint or test. +- If you are unsure what to run before pushing, use `make pre-pr`. ## Opening a Pull Request diff --git a/changelog.d/codex-fix-economy-dataset-selection.fixed.md b/changelog.d/codex-fix-economy-dataset-selection.fixed.md new file mode 100644 index 000000000..02f913057 --- /dev/null +++ b/changelog.d/codex-fix-economy-dataset-selection.fixed.md @@ -0,0 +1 @@ +Honor explicit economy dataset selection in society-wide calculations. diff --git a/policyengine_api/services/economy_service.py b/policyengine_api/services/economy_service.py index 031696286..5a86b2ca2 100644 --- a/policyengine_api/services/economy_service.py +++ b/policyengine_api/services/economy_service.py @@ -11,7 +11,10 @@ ) from policyengine_api.gcp_logging import logger from policyengine_api.libs.simulation_api_modal import simulation_api_modal -from policyengine_api.data.model_setup import get_dataset_version +from policyengine_api.data.model_setup import ( + datasets as configured_datasets, + get_dataset_version, +) from policyengine_api.data.congressional_districts import ( get_valid_state_codes, get_valid_congressional_districts, @@ -534,13 +537,20 @@ def _setup_data( Determine the dataset to use based on the country and region. If the dataset is in PASSTHROUGH_DATASETS, it will be passed directly - to the simulation API. Otherwise, uses policyengine's get_default_dataset - to resolve the appropriate GCS path. + to the simulation API. If the dataset matches a configured dataset alias + for the country, resolve it to the published dataset URI. Otherwise, + uses policyengine's get_default_dataset to resolve the appropriate GCS + path. """ # If the dataset is a recognized passthrough keyword, use it directly if dataset in self.PASSTHROUGH_DATASETS: return dataset + # Resolve explicit dataset aliases exposed in metadata. + country_datasets = configured_datasets.get(country_id, {}) + if dataset in country_datasets: + return country_datasets[dataset].removesuffix("@None") + try: return get_default_dataset(country_id, region) except ValueError as e: diff --git a/tests/unit/services/test_economy_service.py b/tests/unit/services/test_economy_service.py index c49783bad..0b71fe59f 100644 --- a/tests/unit/services/test_economy_service.py +++ b/tests/unit/services/test_economy_service.py @@ -811,6 +811,25 @@ def test__given_congressional_district__returns_correct_sim_options( assert sim_options["region"] == "congressional_district/CA-37" assert sim_options["data"] == "gs://policyengine-us-data/districts/CA-37.h5" + def test__given_explicit_dataset__returns_named_dataset(self): + service = EconomyService() + + sim_options_model = service._setup_sim_options( + self.test_country_id, + self.test_reform_policy, + self.test_current_law_baseline_policy, + self.test_region, + self.test_time_period, + self.test_scope, + dataset="enhanced_cps", + ) + + sim_options = sim_options_model.model_dump() + assert ( + sim_options["data"] + == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5" + ) + class TestSetupRegion: """Tests for _setup_region method. @@ -972,6 +991,26 @@ def test__given_passthrough_test_dataset__returns_dataset_directly( ) assert result == "national-with-breakdowns-test" + def test__given_explicit_us_enhanced_cps__returns_named_dataset(self): + service = EconomyService() + result = service._setup_data("us", "us", dataset="enhanced_cps") + assert ( + result == "hf://policyengine/policyengine-us-data/enhanced_cps_2024.h5" + ) + + def test__given_explicit_us_cps__returns_named_dataset(self): + service = EconomyService() + result = service._setup_data("us", "us", dataset="cps") + assert result == "hf://policyengine/policyengine-us-data/cps_2023.h5" + + def test__given_explicit_uk_enhanced_frs__returns_named_dataset(self): + service = EconomyService() + result = service._setup_data("uk", "uk", dataset="enhanced_frs") + assert ( + result + == "hf://policyengine/policyengine-uk-data/enhanced_frs_2023_24.h5" + ) + def test__given_default_dataset__uses_get_default_dataset(self): # Test that "default" falls through to get_default_dataset service = EconomyService()