PolicyEngine · MaxGhenis · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/changelog.d/fix-consumption-weeks-rng.fixed.md b/changelog.d/fix-consumption-weeks-rng.fixed.md
@@ -0,0 +1 @@
+Replace bare `* 52` weekly-to-annual conversion in LCFS imputation with the shared `WEEKS_IN_YEAR = 365.25 / 7` constant used by `datasets/frs.py`, and replace two `np.random.seed(42)` calls with local `np.random.default_rng(42)` so consumption imputation stops mutating the process-wide RNG state.
diff --git a/policyengine_uk_data/datasets/frs.py b/policyengine_uk_data/datasets/frs.py
@@ -28,6 +28,13 @@
 from policyengine_uk_data.parameters import load_take_up_rate, load_parameter
 
 
+# Canonical weeks-per-year conversion factor for annualising weekly survey
+# variables. 365.25 / 7 ≈ 52.1786 accounts for leap years; using the rounded
+# integer 52 would under-count by ~0.34%. Exposed at module level so sibling
+# loaders (e.g. LCFS/ETB in `datasets/imputations/consumption.py`) can import
+# the same value rather than re-defining `* 52` locally and drifting.
+WEEKS_IN_YEAR = 365.25 / 7
+
 LEGACY_JOBSEEKER_MIN_AGE = 18
 HOURS_WORKED_WEEKS_PER_YEAR = 52
 ESA_MIN_AGE = 16

diff --git a/policyengine_uk_data/datasets/imputations/consumption.py b/policyengine_uk_data/datasets/imputations/consumption.py
@@ -25,9 +25,16 @@
 from policyengine_uk_data.storage import STORAGE_FOLDER
 from policyengine_uk.data import UKSingleYearDataset
 from policyengine_uk import Microsimulation
+from policyengine_uk_data.datasets.frs import WEEKS_IN_YEAR
 
 LCFS_TAB_FOLDER = STORAGE_FOLDER / "lcfs_2021_22"
 
+# Default seed for the stochastic ICE-vehicle flag drawn from
+# `NTS_2024_ICE_VEHICLE_SHARE`. Kept at 42 for backward compatibility with
+# existing artefact fingerprints; callers can override via the fixture's
+# local RNG rather than the process-wide np.random global.
+_HAS_FUEL_SEED = 42
+
 # EV/ICE vehicle mix from NTS 2024
 NTS_2024_ICE_VEHICLE_SHARE = 0.90
 
@@ -406,9 +413,12 @@ def create_has_fuel_model():
 
     num_vehicles = was["vcarnr7"].fillna(0).clip(lower=0)
     has_vehicle = num_vehicles > 0
-    np.random.seed(42)
+    # Use a local RNG so we don't mutate the global np.random state (which
+    # would silently change any unrelated consumer of np.random that runs
+    # after this function).
+    rng = np.random.default_rng(_HAS_FUEL_SEED)
     has_fuel = (
-        has_vehicle & (np.random.random(len(was)) < NTS_2024_ICE_VEHICLE_SHARE)
+        has_vehicle & (rng.random(len(was)) < NTS_2024_ICE_VEHICLE_SHARE)
     ).astype(float)
 
     was_df = pd.DataFrame(
@@ -481,18 +491,21 @@ def generate_lcfs_table(lcfs_person: pd.DataFrame, lcfs_household: pd.DataFrame)
 
     household = household.rename(columns=CONSUMPTION_VARIABLE_RENAMES)
 
-    # Annualise weekly LCFS values (× 52)
+    # Annualise weekly LCFS values. Use the same WEEKS_IN_YEAR constant
+    # (365.25 / 7 ≈ 52.1786) as `datasets/frs.py` rather than a bare `* 52`,
+    # which underestimates annual totals by ~0.34% and skews VAT / energy
+    # imputation targets against FRS income.
     annualise = list(CONSUMPTION_VARIABLE_RENAMES.values()) + [
         "hbai_household_net_income",
         "household_gross_income",
         "electricity_consumption",
         "gas_consumption",
     ]
     for variable in annualise:
-        household[variable] = household[variable] * 52
+        household[variable] = household[variable] * WEEKS_IN_YEAR
     for variable in PERSON_LCF_RENAMES.values():
         household[variable] = (
-            person[variable].groupby(person.case).sum()[household.case] * 52
+            person[variable].groupby(person.case).sum()[household.case] * WEEKS_IN_YEAR
         )
     household.household_weight *= 1_000
 
@@ -577,9 +590,10 @@ def impute_consumption(dataset: UKSingleYearDataset) -> UKSingleYearDataset:
     sim = Microsimulation(dataset=dataset)
     num_vehicles = sim.calculate("num_vehicles", map_to="household").values
 
-    np.random.seed(42)
+    # Local RNG — see note at module level (_HAS_FUEL_SEED).
+    rng = np.random.default_rng(_HAS_FUEL_SEED)
     has_vehicle = num_vehicles > 0
-    is_ice = np.random.random(len(num_vehicles)) < NTS_2024_ICE_VEHICLE_SHARE
+    is_ice = rng.random(len(num_vehicles)) < NTS_2024_ICE_VEHICLE_SHARE
     has_fuel_consumption = (has_vehicle & is_ice).astype(float)
     dataset.household["has_fuel_consumption"] = has_fuel_consumption
 

diff --git a/policyengine_uk_data/tests/test_consumption_weeks_rng.py b/policyengine_uk_data/tests/test_consumption_weeks_rng.py
@@ -0,0 +1,76 @@
+"""Regression tests for `datasets/imputations/consumption.py`.
+
+Covers the two sub-fixes in bug-hunt finding U6:
+
+- Weekly → annual conversion used a bare ``* 52`` while the sibling FRS
+  loader uses ``WEEKS_IN_YEAR = 365.25 / 7 ≈ 52.1786``. The ~0.34% gap
+  skews VAT / energy imputation targets against FRS income.
+- Two ``np.random.seed(42)`` calls mutated the global RNG state, so any
+  unrelated numpy random call after consumption imputation changed output.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+
+import numpy as np
+import pytest
+
+if importlib.util.find_spec("policyengine_uk") is None:
+    pytest.skip(
+        "policyengine_uk not available in test environment",
+        allow_module_level=True,
+    )
+
+
+def test_consumption_imports_shared_weeks_in_year():
+    """consumption.py should use the same WEEKS_IN_YEAR as frs.py."""
+    from policyengine_uk_data.datasets.frs import WEEKS_IN_YEAR as frs_value
+    from policyengine_uk_data.datasets.imputations import (
+        consumption as consumption_module,
+    )
+
+    assert frs_value == 365.25 / 7
+    # Imported at module scope, not re-computed locally with a different value.
+    assert consumption_module.WEEKS_IN_YEAR == frs_value
+
+
+def test_consumption_module_no_longer_calls_np_random_seed():
+    """The fixed module must not reach into the process-wide RNG state.
+
+    The buggy version called `np.random.seed(42)` in two places, which
+    mutated the global RNG and silently changed output for any later
+    numpy random consumer in the same process.
+    """
+    import inspect
+
+    from policyengine_uk_data.datasets.imputations import (
+        consumption as consumption_module,
+    )
+
+    src = inspect.getsource(consumption_module)
+    assert "np.random.seed(" not in src, (
+        "consumption.py still mutates the global RNG state via "
+        "np.random.seed(...) — use np.random.default_rng(seed) instead."
+    )
+
+
+def test_consumption_does_not_perturb_global_rng_on_import():
+    """Importing consumption.py must not change global RNG state.
+
+    Regression against the original bug where running module-level code
+    paths (e.g. during test collection) could have reseeded the global RNG.
+    """
+    np.random.seed(1234)
+    before = np.random.random(3)
+
+    # Force reimport to exercise module-level side effects.
+    import importlib
+
+    from policyengine_uk_data.datasets.imputations import consumption
+
+    importlib.reload(consumption)
+
+    np.random.seed(1234)
+    after = np.random.random(3)
+    np.testing.assert_allclose(before, after)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Replace bare `* 52` weekly-to-annual conversion in LCFS imputation with the shared `WEEKS_IN_YEAR = 365.25 / 7` constant used by `datasets/frs.py`, and replace two `np.random.seed(42)` calls with local `np.random.default_rng(42)` so consumption imputation stops mutating the process-wide RNG state.