PolicyEngine · nikhilwoodruff · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/scripts/build_targets/dwp.py b/scripts/build_targets/dwp.py
@@ -375,6 +375,81 @@ def _fetch_uc_breakdowns() -> list[dict]:
     except Exception as e:
         logger.warning("Failed to fetch UC housing breakdown: %s", e)
 
+    # UC households by monthly payment band — constrains the UC amount distribution
+    try:
+        result = _query_table(
+            _UC_HH_DB,
+            [_UC_HH_COUNT],
+            [[f"{_UC_HH_FIELD}:hnpayment_band"]],
+        )
+        year = _extract_year(result)
+        pairs = _extract_breakdown(result)
+
+        # Consolidate into wider bands (monthly £ → annual £ for filter ranges).
+        # Stat-xplore bands are £100-wide from £0 to £2500+. We group into ~£300-400
+        # bands to keep the target count reasonable while constraining the distribution.
+        _BAND_GROUPS = [
+            ("0_to_300", 0, 300),
+            ("300_to_600", 300, 600),
+            ("600_to_900", 600, 900),
+            ("900_to_1200", 900, 1200),
+            ("1200_to_1500", 1200, 1500),
+            ("1500_to_2000", 1500, 2000),
+            ("2000_plus", 2000, 999999),
+        ]
+
+        # Parse stat-xplore band labels into (lower_monthly, upper_monthly, count)
+        parsed_bands = []
+        for label, count in pairs:
+            low_label = label.lower().strip()
+            if "no payment" in low_label:
+                parsed_bands.append((0, 0, count))
+            elif "or over" in low_label:
+                # e.g. "£2500.01 or over"
+                val = float(low_label.split("£")[1].split(" ")[0])
+                parsed_bands.append((val, 999999, count))
+            elif " to " in low_label:
+                parts = low_label.replace("£", "").replace(",", "").split(" to ")
+                lo = float(parts[0])
+                hi = float(parts[1])
+                parsed_bands.append((lo, hi, count))
+
+        # Aggregate into grouped bands
+        for group_name, group_lo, group_hi in _BAND_GROUPS:
+            group_count = 0.0
+            for lo, hi, count in parsed_bands:
+                if lo == 0 and hi == 0:
+                    continue  # skip "no payment"
+                band_mid = (lo + min(hi, 5000)) / 2.0
+                if group_lo <= band_mid < group_hi:
+                    group_count += count
+
+            if group_count > 0:
+                # Filter range: convert monthly band to annual
+                annual_lo = group_lo * 12.0
+                annual_hi = group_hi * 12.0
+
+                targets.append(
+                    {
+                        "name": f"dwp/uc_payment_band_{group_name}",
+                        "variable": "universal_credit",
+                        "entity": "benunit",
+                        "aggregation": "count_nonzero",
+                        "filter": {
+                            "variable": "universal_credit",
+                            "min": annual_lo,
+                            "max": annual_hi,
+                        },
+                        "value": group_count,
+                        "source": "dwp",
+                        "year": year,
+                        "holdout": False,
+                    }
+                )
+
+    except Exception as e:
+        logger.warning("Failed to fetch UC payment band breakdown: %s", e)
+
     return targets
 
 
@@ -384,10 +459,10 @@ def _fetch_uc_breakdowns() -> list[dict]:
 )
 DWP_FORECAST_FILE = CACHE_DIR / "dwp_spring_statement_2025.xlsx"
 
-CALIBRATION_YEARS = range(2024, 2030)  # 2024/25 through 2029/30
+CALIBRATION_YEARS = range(2023, 2030)  # 2023/24 through 2029/30
 
 # Column 80 = 2024/25, ..., 85 = 2029/30 in the DWP forecast xlsx
-_FORECAST_COL_TO_YEAR = {80: 2024, 81: 2025, 82: 2026, 83: 2027, 84: 2028, 85: 2029}
+_FORECAST_COL_TO_YEAR = {79: 2023, 80: 2024, 81: 2025, 82: 2026, 83: 2027, 84: 2028, 85: 2029}
 
 
 def _download_forecast() -> Path:
@@ -530,6 +605,20 @@ def _scale_targets_to_years(
         "dwp/uc_households_single_with_children": "universal_credit",
         "dwp/uc_households_couple_no_children": "universal_credit",
         "dwp/uc_households_couple_with_children": "universal_credit",
+        # Payment band breakdowns scale with total UC
+        "dwp/uc_payment_band_0_to_300": "universal_credit",
+        "dwp/uc_payment_band_300_to_600": "universal_credit",
+        "dwp/uc_payment_band_600_to_900": "universal_credit",
+        "dwp/uc_payment_band_900_to_1200": "universal_credit",
+        "dwp/uc_payment_band_1200_to_1500": "universal_credit",
+        "dwp/uc_payment_band_1500_to_2000": "universal_credit",
+        "dwp/uc_payment_band_2000_plus": "universal_credit",
+        # Age band breakdowns scale with total UC
+        "dwp/uc_age_16_24": "universal_credit",
+        "dwp/uc_age_25_34": "universal_credit",
+        "dwp/uc_age_35_49": "universal_credit",
+        "dwp/uc_age_50_64": "universal_credit",
+        "dwp/uc_age_65_plus": "universal_credit",
     }
 
     scaled: list[dict] = []

diff --git a/scripts/build_targets/hmrc.py b/scripts/build_targets/hmrc.py
@@ -28,7 +28,7 @@
 # HMRC SPI 2022-23 collated tables (ODS)
 SPI_URL = "https://assets.publishing.service.gov.uk/media/67cabb37ade26736dbf9ffe5/Collated_Tables_3_1_to_3_17_2223.ods"
 SPI_YEAR = 2022  # FY 2022-23 → base year for growth indexing
-CALIBRATION_YEARS = range(2024, 2031)
+CALIBRATION_YEARS = range(2023, 2031)
 
 INCOME_BANDS_LOWER = [
     12_570,

diff --git a/scripts/build_targets/obr.py b/scripts/build_targets/obr.py
@@ -319,29 +319,30 @@ def _parse_welfare() -> list[dict]:
         ("State pension", "obr/state_pension", "state_pension", "benunit"),
     ]
 
-    # UC appears twice in 4.9 — inside and outside the welfare cap. We want both.
-    uc_rows_found = 0
+    # UC appears twice in 4.9 — inside and outside the welfare cap. Sum them
+    # into a single total UC spend target since our simulation doesn't
+    # distinguish the two components.
+    uc_by_year: dict[int, float] = {}
     for row_num in range(6, 50):
         val = ws[f"B{row_num}"].value
         if val and str(val).strip().startswith("Universal credit"):
-            uc_rows_found += 1
-            suffix = "in_cap" if uc_rows_found == 1 else "outside_cap"
             values = _read_row(ws, row_num, _WELFARE_COL_TO_YEAR)
             for year, value in values.items():
-                targets.append(
-                    {
-                        "name": f"obr/universal_credit_{suffix}/{year}",
-                        "variable": "universal_credit",
-                        "entity": "benunit",
-                        "aggregation": "sum",
-                        "filter": None,
-                        "value": value,
-                        "source": "obr",
-                        "year": year,
-                        "holdout": suffix
-                        == "outside_cap",  # Only use one UC total for training
-                    }
-                )
+                uc_by_year[year] = uc_by_year.get(year, 0.0) + value
+    for year, value in uc_by_year.items():
+        targets.append(
+            {
+                "name": f"obr/universal_credit_total/{year}",
+                "variable": "universal_credit",
+                "entity": "benunit",
+                "aggregation": "sum",
+                "filter": None,
+                "value": value,
+                "source": "obr",
+                "year": year,
+                "holdout": False,
+            }
+        )
 
     for label, name, variable, entity in benefit_rows:
         row = _find_row(ws, label)
@@ -534,6 +535,61 @@ def _parse_economy() -> list[dict]:
     return targets
 
 
+def _backfill_2023(targets: list[dict]) -> list[dict]:
+    """Back-extrapolate 2023 targets from 2024 outturn.
+
+    The March 2026 EFO's earliest column is 2024/25 outturn. For 2023/24 we
+    scale backwards using OBR growth rates: earnings growth for tax receipts,
+    CPI for benefit spending, council tax growth for council tax.
+    """
+    # OBR growth rates for the 2023→2024 transition (from economy tables)
+    EARNINGS_GROWTH_2024 = 0.0493
+    CPI_GROWTH_2024 = 0.0253
+    CT_GROWTH_2024 = 0.051
+
+    # Which growth factor to use for each target prefix
+    _DEFLATOR = {
+        "obr/income_tax": EARNINGS_GROWTH_2024,
+        "obr/ni_": EARNINGS_GROWTH_2024,
+        "obr/vat_": EARNINGS_GROWTH_2024,
+        "obr/fuel_duty": EARNINGS_GROWTH_2024,
+        "obr/cgt_": EARNINGS_GROWTH_2024,
+        "obr/sdlt_": EARNINGS_GROWTH_2024,
+        "obr/council_tax": CT_GROWTH_2024,
+        "obr/housing_benefit": CPI_GROWTH_2024,
+        "obr/pip_dla": CPI_GROWTH_2024,
+        "obr/attendance_allowance": CPI_GROWTH_2024,
+        "obr/pension_credit": CPI_GROWTH_2024,
+        "obr/carers_allowance": CPI_GROWTH_2024,
+        "obr/child_benefit": CPI_GROWTH_2024,
+        "obr/state_pension": CPI_GROWTH_2024,
+        "obr/universal_credit": CPI_GROWTH_2024,
+    }
+
+    existing_2023 = {t["name"] for t in targets if t["year"] == 2023}
+    extra = []
+    for t in targets:
+        if t["year"] != 2024 or t["source"] != "obr":
+            continue
+        name_2023 = t["name"].replace("/2024", "/2023")
+        if name_2023 in existing_2023:
+            continue
+        # Find the right deflator
+        growth = None
+        for prefix, rate in _DEFLATOR.items():
+            if t["name"].startswith(prefix):
+                growth = rate
+                break
+        if growth is None:
+            continue
+        t2 = dict(t)
+        t2["name"] = name_2023
+        t2["year"] = 2023
+        t2["value"] = t["value"] / (1 + growth)
+        extra.append(t2)
+    return targets + extra
+
+
 def get_targets() -> list[dict]:
     targets = []
     if RECEIPTS_FILE.exists():
@@ -544,4 +600,5 @@ def get_targets() -> list[dict]:
         targets.extend(_parse_council_tax())
     if ECONOMY_FILE.exists():
         targets.extend(_parse_economy())
+    targets = _backfill_2023(targets)
     return targets
diff --git a/scripts/build_targets/ons.py b/scripts/build_targets/ons.py
@@ -1,11 +1,13 @@
 """ONS demographic calibration targets.
 
-Population by age group, total households, and regional distribution.
-These are from ONS mid-year population estimates and household projections.
+Population by age group, total households, tenure distribution, and regional
+population. From ONS mid-year population estimates, household projections,
+and English Housing Survey / census tenure breakdowns.
 
 Sources:
 - ONS mid-year population estimates 2023
 - ONS household projections
+- English Housing Survey / Census 2021 tenure distribution (UK-adjusted)
 """
 
 from __future__ import annotations
@@ -40,6 +42,34 @@
     "northern_ireland": 1_900_000,
 }
 
+# Household tenure distribution (UK, ~2023).
+# Source: EHS 2022-23 headline report + census 2021 proportions for DA adjustment.
+# tenure_type RF codes: 0=OwnedOutright, 1=OwnedWithMortgage, 2=RentFromCouncil,
+# 3=RentFromHA, 4=RentPrivately, 5=Other.
+# We combine social rent (council + HA) and use 3 broad categories.
+_TENURE_HOUSEHOLDS = {
+    "owned_outright": (0, 0, 8_800_000),       # ~31%
+    "owned_mortgage": (1, 1, 6_600_000),        # ~23%
+    "social_rent": (2, 3, 4_700_000),           # ~17% (council + HA)
+    "private_rent": (4, 4, 4_900_000),          # ~17%
+}
+
+# Region RF codes matching the Rust enum.
+_REGION_RF_CODE = {
+    "north_east": 0,
+    "north_west": 1,
+    "yorkshire": 2,
+    "east_midlands": 3,
+    "west_midlands": 4,
+    "east_of_england": 5,
+    "london": 6,
+    "south_east": 7,
+    "south_west": 8,
+    "wales": 9,
+    "scotland": 10,
+    "northern_ireland": 11,
+}
+
 
 def get_targets() -> list[dict]:
     """Generate ONS demographic targets for all calibration years.
@@ -51,7 +81,7 @@ def get_targets() -> list[dict]:
     targets = []
 
     # Emit for all plausible calibration years
-    for year in range(2024, 2031):
+    for year in range(2023, 2031):
         # Age group population counts
         for group, count in _POPULATION.items():
             if group == "total":
@@ -107,4 +137,49 @@ def get_targets() -> list[dict]:
             }
         )
 
+        # Households by tenure
+        for tenure_name, (code_lo, code_hi, count) in _TENURE_HOUSEHOLDS.items():
+            targets.append(
+                {
+                    "name": f"ons/tenure_{tenure_name}/{year}",
+                    "variable": "household_id",
+                    "entity": "household",
+                    "aggregation": "count",
+                    "filter": {
+                        "variable": "tenure_type",
+                        "min": float(code_lo),
+                        "max": float(code_hi) + 1.0,  # exclusive upper bound
+                    },
+                    "value": float(count),
+                    "source": "ons",
+                    "year": year,
+                    "holdout": False,
+                }
+            )
+
+        # Households by region
+        for region_name, code in _REGION_RF_CODE.items():
+            pop = _REGIONAL_POPULATION.get(region_name, 0)
+            if pop == 0:
+                continue
+            # Approximate households from population using national ratio
+            hh_count = pop * _TOTAL_HOUSEHOLDS / _POPULATION["total"]
+            targets.append(
+                {
+                    "name": f"ons/region_{region_name}/{year}",
+                    "variable": "household_id",
+                    "entity": "household",
+                    "aggregation": "count",
+                    "filter": {
+                        "variable": "region",
+                        "min": float(code),
+                        "max": float(code) + 1.0,
+                    },
+                    "value": round(hh_count),
+                    "source": "ons",
+                    "year": year,
+                    "holdout": True,  # holdout — approximate conversion
+                }
+            )
+
     return targets