Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/fix-income-zero-guard.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Guard the rent/mortgage rescaling in `impute_over_incomes` against `ZeroDivisionError` when the seed dataset's imputation columns sum to zero (e.g. the zero-weight synthetic copy in `impute_income`).
17 changes: 16 additions & 1 deletion policyengine_uk_data/datasets/imputations/income.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,21 @@ def generate_spi_table(spi: pd.DataFrame):
INCOME_MODEL_PATH = STORAGE_FOLDER / "income.pkl"


def _safe_rescale_factor(original: float, new: float) -> float:
"""Return the rent/mortgage rescaling factor used after income imputation.

Guards against a degenerate input where the seed dataset's imputation
columns sum to zero (e.g. the zero-weight synthetic copy used in
``impute_income`` before incomes have been populated). In that case we
cannot compute a meaningful ratio, so leave housing costs untouched
(factor=1.0) rather than raising ``ZeroDivisionError`` or silently
propagating NaN / inf into downstream household tables.
"""
if original == 0:
return 1.0
return new / original


def save_imputation_models():
"""
Train and save income imputation model.
Expand Down Expand Up @@ -190,7 +205,7 @@ def impute_over_incomes(
dataset.person[column] = output_df[column].fillna(0).values

new_income_total = dataset.person[INCOME_COMPONENTS].sum().sum()
adjustment_factor = new_income_total / original_income_total
adjustment_factor = _safe_rescale_factor(original_income_total, new_income_total)
# Adjust rent and mortgage interest and capital repayments proportionally
dataset.household["rent"] = dataset.household["rent"] * adjustment_factor
dataset.household["mortgage_interest_repayment"] = (
Expand Down
45 changes: 45 additions & 0 deletions policyengine_uk_data/tests/test_income_rescale_factor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Unit tests for the rent/mortgage rescale factor helper in income.py.

Guards the zero-division bug reported in the bug hunt (finding U3):
`impute_over_incomes` computed ``new_income_total / original_income_total``
with no check for the degenerate case where the seed dataset had zero in
every imputation column — which is exactly the shape of the
`zero_weight_copy` branch inside `impute_income`.
"""

from __future__ import annotations

import math

import pytest


def test_safe_rescale_factor_with_zero_original_returns_one():
from policyengine_uk_data.datasets.imputations.income import (
_safe_rescale_factor,
)

# The bug: dividing by zero raised ZeroDivisionError (or produced inf).
# The fix: leave housing costs untouched when we have no baseline.
assert _safe_rescale_factor(0, 123_456) == 1.0
assert _safe_rescale_factor(0.0, 0.0) == 1.0


def test_safe_rescale_factor_with_nonzero_original_returns_ratio():
from policyengine_uk_data.datasets.imputations.income import (
_safe_rescale_factor,
)

assert _safe_rescale_factor(1_000.0, 2_500.0) == pytest.approx(2.5)
assert _safe_rescale_factor(42.0, 42.0) == pytest.approx(1.0)


def test_safe_rescale_factor_preserves_finiteness():
from policyengine_uk_data.datasets.imputations.income import (
_safe_rescale_factor,
)

# Non-zero inputs must still return finite floats.
for original, new in [(1e9, 2e9), (1e-6, 1e-9), (100.0, 0.0)]:
factor = _safe_rescale_factor(original, new)
assert math.isfinite(factor), (original, new, factor)
Loading