Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 91 additions & 2 deletions scripts/build_targets/dwp.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,81 @@ def _fetch_uc_breakdowns() -> list[dict]:
except Exception as e:
logger.warning("Failed to fetch UC housing breakdown: %s", e)

# UC households by monthly payment band — constrains the UC amount distribution
try:
result = _query_table(
_UC_HH_DB,
[_UC_HH_COUNT],
[[f"{_UC_HH_FIELD}:hnpayment_band"]],
)
year = _extract_year(result)
pairs = _extract_breakdown(result)

# Consolidate into wider bands (monthly £ → annual £ for filter ranges).
# Stat-xplore bands are £100-wide from £0 to £2500+. We group into ~£300-400
# bands to keep the target count reasonable while constraining the distribution.
_BAND_GROUPS = [
("0_to_300", 0, 300),
("300_to_600", 300, 600),
("600_to_900", 600, 900),
("900_to_1200", 900, 1200),
("1200_to_1500", 1200, 1500),
("1500_to_2000", 1500, 2000),
("2000_plus", 2000, 999999),
]

# Parse stat-xplore band labels into (lower_monthly, upper_monthly, count)
parsed_bands = []
for label, count in pairs:
low_label = label.lower().strip()
if "no payment" in low_label:
parsed_bands.append((0, 0, count))
elif "or over" in low_label:
# e.g. "£2500.01 or over"
val = float(low_label.split("£")[1].split(" ")[0])
parsed_bands.append((val, 999999, count))
elif " to " in low_label:
parts = low_label.replace("£", "").replace(",", "").split(" to ")
lo = float(parts[0])
hi = float(parts[1])
parsed_bands.append((lo, hi, count))

# Aggregate into grouped bands
for group_name, group_lo, group_hi in _BAND_GROUPS:
group_count = 0.0
for lo, hi, count in parsed_bands:
if lo == 0 and hi == 0:
continue # skip "no payment"
band_mid = (lo + min(hi, 5000)) / 2.0
if group_lo <= band_mid < group_hi:
group_count += count

if group_count > 0:
# Filter range: convert monthly band to annual
annual_lo = group_lo * 12.0
annual_hi = group_hi * 12.0

targets.append(
{
"name": f"dwp/uc_payment_band_{group_name}",
"variable": "universal_credit",
"entity": "benunit",
"aggregation": "count_nonzero",
"filter": {
"variable": "universal_credit",
"min": annual_lo,
"max": annual_hi,
},
"value": group_count,
"source": "dwp",
"year": year,
"holdout": False,
}
)

except Exception as e:
logger.warning("Failed to fetch UC payment band breakdown: %s", e)

return targets


Expand All @@ -384,10 +459,10 @@ def _fetch_uc_breakdowns() -> list[dict]:
)
DWP_FORECAST_FILE = CACHE_DIR / "dwp_spring_statement_2025.xlsx"

CALIBRATION_YEARS = range(2024, 2030) # 2024/25 through 2029/30
CALIBRATION_YEARS = range(2023, 2030) # 2023/24 through 2029/30

# Column 80 = 2024/25, ..., 85 = 2029/30 in the DWP forecast xlsx
_FORECAST_COL_TO_YEAR = {80: 2024, 81: 2025, 82: 2026, 83: 2027, 84: 2028, 85: 2029}
_FORECAST_COL_TO_YEAR = {79: 2023, 80: 2024, 81: 2025, 82: 2026, 83: 2027, 84: 2028, 85: 2029}


def _download_forecast() -> Path:
Expand Down Expand Up @@ -530,6 +605,20 @@ def _scale_targets_to_years(
"dwp/uc_households_single_with_children": "universal_credit",
"dwp/uc_households_couple_no_children": "universal_credit",
"dwp/uc_households_couple_with_children": "universal_credit",
# Payment band breakdowns scale with total UC
"dwp/uc_payment_band_0_to_300": "universal_credit",
"dwp/uc_payment_band_300_to_600": "universal_credit",
"dwp/uc_payment_band_600_to_900": "universal_credit",
"dwp/uc_payment_band_900_to_1200": "universal_credit",
"dwp/uc_payment_band_1200_to_1500": "universal_credit",
"dwp/uc_payment_band_1500_to_2000": "universal_credit",
"dwp/uc_payment_band_2000_plus": "universal_credit",
# Age band breakdowns scale with total UC
"dwp/uc_age_16_24": "universal_credit",
"dwp/uc_age_25_34": "universal_credit",
"dwp/uc_age_35_49": "universal_credit",
"dwp/uc_age_50_64": "universal_credit",
"dwp/uc_age_65_plus": "universal_credit",
}

scaled: list[dict] = []
Expand Down
2 changes: 1 addition & 1 deletion scripts/build_targets/hmrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
# HMRC SPI 2022-23 collated tables (ODS)
SPI_URL = "https://assets.publishing.service.gov.uk/media/67cabb37ade26736dbf9ffe5/Collated_Tables_3_1_to_3_17_2223.ods"
SPI_YEAR = 2022 # FY 2022-23 → base year for growth indexing
CALIBRATION_YEARS = range(2024, 2031)
CALIBRATION_YEARS = range(2023, 2031)

INCOME_BANDS_LOWER = [
12_570,
Expand Down
93 changes: 75 additions & 18 deletions scripts/build_targets/obr.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,29 +319,30 @@ def _parse_welfare() -> list[dict]:
("State pension", "obr/state_pension", "state_pension", "benunit"),
]

# UC appears twice in 4.9 — inside and outside the welfare cap. We want both.
uc_rows_found = 0
# UC appears twice in 4.9 — inside and outside the welfare cap. Sum them
# into a single total UC spend target since our simulation doesn't
# distinguish the two components.
uc_by_year: dict[int, float] = {}
for row_num in range(6, 50):
val = ws[f"B{row_num}"].value
if val and str(val).strip().startswith("Universal credit"):
uc_rows_found += 1
suffix = "in_cap" if uc_rows_found == 1 else "outside_cap"
values = _read_row(ws, row_num, _WELFARE_COL_TO_YEAR)
for year, value in values.items():
targets.append(
{
"name": f"obr/universal_credit_{suffix}/{year}",
"variable": "universal_credit",
"entity": "benunit",
"aggregation": "sum",
"filter": None,
"value": value,
"source": "obr",
"year": year,
"holdout": suffix
== "outside_cap", # Only use one UC total for training
}
)
uc_by_year[year] = uc_by_year.get(year, 0.0) + value
for year, value in uc_by_year.items():
targets.append(
{
"name": f"obr/universal_credit_total/{year}",
"variable": "universal_credit",
"entity": "benunit",
"aggregation": "sum",
"filter": None,
"value": value,
"source": "obr",
"year": year,
"holdout": False,
}
)

for label, name, variable, entity in benefit_rows:
row = _find_row(ws, label)
Expand Down Expand Up @@ -534,6 +535,61 @@ def _parse_economy() -> list[dict]:
return targets


def _backfill_2023(targets: list[dict]) -> list[dict]:
"""Back-extrapolate 2023 targets from 2024 outturn.

The March 2026 EFO's earliest column is 2024/25 outturn. For 2023/24 we
scale backwards using OBR growth rates: earnings growth for tax receipts,
CPI for benefit spending, council tax growth for council tax.
"""
# OBR growth rates for the 2023→2024 transition (from economy tables)
EARNINGS_GROWTH_2024 = 0.0493
CPI_GROWTH_2024 = 0.0253
CT_GROWTH_2024 = 0.051

# Which growth factor to use for each target prefix
_DEFLATOR = {
"obr/income_tax": EARNINGS_GROWTH_2024,
"obr/ni_": EARNINGS_GROWTH_2024,
"obr/vat_": EARNINGS_GROWTH_2024,
"obr/fuel_duty": EARNINGS_GROWTH_2024,
"obr/cgt_": EARNINGS_GROWTH_2024,
"obr/sdlt_": EARNINGS_GROWTH_2024,
"obr/council_tax": CT_GROWTH_2024,
"obr/housing_benefit": CPI_GROWTH_2024,
"obr/pip_dla": CPI_GROWTH_2024,
"obr/attendance_allowance": CPI_GROWTH_2024,
"obr/pension_credit": CPI_GROWTH_2024,
"obr/carers_allowance": CPI_GROWTH_2024,
"obr/child_benefit": CPI_GROWTH_2024,
"obr/state_pension": CPI_GROWTH_2024,
"obr/universal_credit": CPI_GROWTH_2024,
}

existing_2023 = {t["name"] for t in targets if t["year"] == 2023}
extra = []
for t in targets:
if t["year"] != 2024 or t["source"] != "obr":
continue
name_2023 = t["name"].replace("/2024", "/2023")
if name_2023 in existing_2023:
continue
# Find the right deflator
growth = None
for prefix, rate in _DEFLATOR.items():
if t["name"].startswith(prefix):
growth = rate
break
if growth is None:
continue
t2 = dict(t)
t2["name"] = name_2023
t2["year"] = 2023
t2["value"] = t["value"] / (1 + growth)
extra.append(t2)
return targets + extra


def get_targets() -> list[dict]:
targets = []
if RECEIPTS_FILE.exists():
Expand All @@ -544,4 +600,5 @@ def get_targets() -> list[dict]:
targets.extend(_parse_council_tax())
if ECONOMY_FILE.exists():
targets.extend(_parse_economy())
targets = _backfill_2023(targets)
return targets
81 changes: 78 additions & 3 deletions scripts/build_targets/ons.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""ONS demographic calibration targets.

Population by age group, total households, and regional distribution.
These are from ONS mid-year population estimates and household projections.
Population by age group, total households, tenure distribution, and regional
population. From ONS mid-year population estimates, household projections,
and English Housing Survey / census tenure breakdowns.

Sources:
- ONS mid-year population estimates 2023
- ONS household projections
- English Housing Survey / Census 2021 tenure distribution (UK-adjusted)
"""

from __future__ import annotations
Expand Down Expand Up @@ -40,6 +42,34 @@
"northern_ireland": 1_900_000,
}

# Household tenure distribution (UK, ~2023).
# Source: EHS 2022-23 headline report + census 2021 proportions for DA adjustment.
# tenure_type RF codes: 0=OwnedOutright, 1=OwnedWithMortgage, 2=RentFromCouncil,
# 3=RentFromHA, 4=RentPrivately, 5=Other.
# We combine social rent (council + HA) and use 3 broad categories.
_TENURE_HOUSEHOLDS = {
"owned_outright": (0, 0, 8_800_000), # ~31%
"owned_mortgage": (1, 1, 6_600_000), # ~23%
"social_rent": (2, 3, 4_700_000), # ~17% (council + HA)
"private_rent": (4, 4, 4_900_000), # ~17%
}

# Region RF codes matching the Rust enum.
_REGION_RF_CODE = {
"north_east": 0,
"north_west": 1,
"yorkshire": 2,
"east_midlands": 3,
"west_midlands": 4,
"east_of_england": 5,
"london": 6,
"south_east": 7,
"south_west": 8,
"wales": 9,
"scotland": 10,
"northern_ireland": 11,
}


def get_targets() -> list[dict]:
"""Generate ONS demographic targets for all calibration years.
Expand All @@ -51,7 +81,7 @@ def get_targets() -> list[dict]:
targets = []

# Emit for all plausible calibration years
for year in range(2024, 2031):
for year in range(2023, 2031):
# Age group population counts
for group, count in _POPULATION.items():
if group == "total":
Expand Down Expand Up @@ -107,4 +137,49 @@ def get_targets() -> list[dict]:
}
)

# Households by tenure
for tenure_name, (code_lo, code_hi, count) in _TENURE_HOUSEHOLDS.items():
targets.append(
{
"name": f"ons/tenure_{tenure_name}/{year}",
"variable": "household_id",
"entity": "household",
"aggregation": "count",
"filter": {
"variable": "tenure_type",
"min": float(code_lo),
"max": float(code_hi) + 1.0, # exclusive upper bound
},
"value": float(count),
"source": "ons",
"year": year,
"holdout": False,
}
)

# Households by region
for region_name, code in _REGION_RF_CODE.items():
pop = _REGIONAL_POPULATION.get(region_name, 0)
if pop == 0:
continue
# Approximate households from population using national ratio
hh_count = pop * _TOTAL_HOUSEHOLDS / _POPULATION["total"]
targets.append(
{
"name": f"ons/region_{region_name}/{year}",
"variable": "household_id",
"entity": "household",
"aggregation": "count",
"filter": {
"variable": "region",
"min": float(code),
"max": float(code) + 1.0,
},
"value": round(hh_count),
"source": "ons",
"year": year,
"holdout": True, # holdout — approximate conversion
}
)

return targets
Loading
Loading