diff --git a/tests/test_fingerprint.py b/tests/test_fingerprint.py index 9eae4ae6..ad22ccee 100644 --- a/tests/test_fingerprint.py +++ b/tests/test_fingerprint.py @@ -4,10 +4,12 @@ NOT run by `make test` (excluded in Makefile). Run manually after a full solve: - pytest tests/test_fingerprint.py -v pytest tests/test_fingerprint.py -v --update-fingerprint + pytest tests/test_fingerprint.py -v + pytest tests/test_fingerprint.py -v -k states + pytest tests/test_fingerprint.py -v -k cds -The first run saves a reference fingerprint. +The first run saves a reference fingerprint per scope. Subsequent runs compare against it. Fingerprint method: for each area, round weights to nearest integer, @@ -26,6 +28,7 @@ FINGERPRINT_DIR = AREAS_FOLDER / "fingerprints" STATE_WEIGHT_DIR = AREAS_FOLDER / "weights" / "states" +CD_WEIGHT_DIR = AREAS_FOLDER / "weights" / "cds" ALL_STATES = [ "AL", @@ -82,6 +85,17 @@ ] +def _discover_cd_areas(): + """Discover CD area codes from existing weight files.""" + if not CD_WEIGHT_DIR.exists(): + return [] + codes = sorted( + p.name.replace("_tmd_weights.csv.gz", "").upper() + for p in CD_WEIGHT_DIR.glob("*_tmd_weights.csv.gz") + ) + return codes + + def _compute_fingerprint(areas, weight_dir): """Compute fingerprint from weight files. @@ -145,58 +159,95 @@ def _has_weight_files(weight_dir, areas): return False +def _run_fingerprint_test(scope, areas, weight_dir, update): + """Shared logic for fingerprint comparison.""" + current = _compute_fingerprint(areas, weight_dir) + + if update: + path = _save_fingerprint(scope, current) + pytest.skip(f"Saved to {path} — re-run to test") + + reference = _load_fingerprint(scope) + if reference is None: + path = _save_fingerprint(scope, current) + pytest.skip(f"No reference found. Saved to {path} — re-run") + + ref_n = reference["n_areas"] + cur_n = current["n_areas"] + assert cur_n == ref_n, f"Area count: {ref_n} -> {cur_n}" + + assert ( + current["weight_hash"] == reference["weight_hash"] + ), "Weight hash mismatch — results changed" + + +def _run_detail_test(scope, areas, weight_dir, update): + """Shared logic for per-area sum comparison.""" + if update: + pytest.skip("Update mode") + + reference = _load_fingerprint(scope) + if reference is None: + pytest.skip("No reference fingerprint") + + current = _compute_fingerprint(areas, weight_dir) + ref_sums = reference.get("per_area_int_sums", {}) + cur_sums = current.get("per_area_int_sums", {}) + + mismatches = [] + for area in sorted(ref_sums.keys()): + if area not in cur_sums: + mismatches.append(f"{area}: missing") + continue + if ref_sums[area] != cur_sums[area]: + mismatches.append( + f"{area}: {ref_sums[area]}" f" -> {cur_sums[area]}" + ) + + assert not mismatches, f"{len(mismatches)} areas changed:\n" + "\n".join( + mismatches + ) + + +# --- State tests --- + + @pytest.mark.skipif( not _has_weight_files(STATE_WEIGHT_DIR, ALL_STATES), reason="No state weight files — run solve_weights first", ) -class TestStateFingerprint: +class TestStatesFingerprint: """Fingerprint tests for state weights.""" # pylint: disable=redefined-outer-name def test_state_weights_match_reference(self, update_mode): """Compare weight integer sums against saved reference.""" - current = _compute_fingerprint(ALL_STATES, STATE_WEIGHT_DIR) - - if update_mode: - path = _save_fingerprint("states", current) - pytest.skip(f"Saved to {path} — re-run to test") - - reference = _load_fingerprint("states") - if reference is None: - path = _save_fingerprint("states", current) - pytest.skip(f"No reference found. Saved to {path} — re-run") - - ref_n = reference["n_areas"] - cur_n = current["n_areas"] - assert cur_n == ref_n, f"Area count: {ref_n} -> {cur_n}" - - assert ( - current["weight_hash"] == reference["weight_hash"] - ), "Weight hash mismatch — results changed" - - def test_per_area_sums_match(self, update_mode): - """Identify which areas changed.""" - if update_mode: - pytest.skip("Update mode") - - reference = _load_fingerprint("states") - if reference is None: - pytest.skip("No reference fingerprint") - - current = _compute_fingerprint(ALL_STATES, STATE_WEIGHT_DIR) - ref_sums = reference.get("per_area_int_sums", {}) - cur_sums = current.get("per_area_int_sums", {}) - - mismatches = [] - for area in sorted(ref_sums.keys()): - if area not in cur_sums: - mismatches.append(f"{area}: missing") - continue - if ref_sums[area] != cur_sums[area]: - mismatches.append( - f"{area}: {ref_sums[area]}" f" -> {cur_sums[area]}" - ) - - assert ( - not mismatches - ), f"{len(mismatches)} areas changed:\n" + "\n".join(mismatches) + _run_fingerprint_test( + "states", ALL_STATES, STATE_WEIGHT_DIR, update_mode + ) + + def test_state_per_area_sums_match(self, update_mode): + """Identify which states changed.""" + _run_detail_test("states", ALL_STATES, STATE_WEIGHT_DIR, update_mode) + + +# --- CD tests --- + +_CD_AREAS = _discover_cd_areas() + + +@pytest.mark.skipif( + not _has_weight_files(CD_WEIGHT_DIR, _CD_AREAS), + reason="No CD weight files — run solve_weights --scope cds first", +) +class TestCdsFingerprint: + """Fingerprint tests for congressional district weights.""" + + # pylint: disable=redefined-outer-name + def test_cds_weights_match_reference(self, update_mode): + """Compare weight integer sums against saved reference.""" + _run_fingerprint_test("cds", _CD_AREAS, CD_WEIGHT_DIR, update_mode) + + def test_cds_per_area_sums_match(self, update_mode): + """Identify which CDs changed.""" + _run_detail_test("cds", _CD_AREAS, CD_WEIGHT_DIR, update_mode) diff --git a/tests/test_prepare_targets.py b/tests/test_prepare_targets.py index 714cabd3..f273b502 100644 --- a/tests/test_prepare_targets.py +++ b/tests/test_prepare_targets.py @@ -98,6 +98,23 @@ def test_mn_agi_share_reasonable(self, shares_data): share = mn_agi["soi_share"].values[0] assert 0.01 < share < 0.03 + def test_no_duplicate_shares(self, shares_data): + """Each (area, var, count, fstatus, agistub) has one share.""" + state_shares = shares_data[~shares_data["stabbr"].isin(_EXCLUDE)] + group_cols = [ + "stabbr", + "basesoivname", + "count", + "fstatus", + "agistub", + ] + counts = state_shares.groupby(group_cols).size() + dupes = counts[counts > 1] + assert len(dupes) == 0, ( + f"Found {len(dupes)} duplicate share groups. " + f"First few: {dupes.head(5).to_dict()}" + ) + def test_xtot_equals_us_population(self): """XTOT 51-state sum equals US Census population.""" pop_df = get_state_population(2022) diff --git a/tmd/areas/AREA_WEIGHTING_GUIDE.md b/tmd/areas/AREA_WEIGHTING_GUIDE.md new file mode 100644 index 00000000..256dd587 --- /dev/null +++ b/tmd/areas/AREA_WEIGHTING_GUIDE.md @@ -0,0 +1,592 @@ +# Area Weighting Guide + +How area-specific weights are constructed for TMD, and how to update +them for new data. + +## Overview + +TMD is a national microdata file — every record represents tax filers +across the entire US. Area weighting creates per-area weight vectors +that make the same records represent a specific state, congressional +district, or county. + +### The optimization problem + +For each area we solve: + + minimize sum( (x[i] - 1)^2 ) + subject to target[j] * (1 - tol) <= sum(B[j,i] * x[i]) <= target[j] * (1 + tol) + x_min <= x[i] <= x_max + +where `x[i]` is a weight multiplier for record `i`, and the area +weight for that record is: + + area_weight[i] = x[i] * pop_share * national_weight[i] + +The term `pop_share * national_weight[i]` is the weight a record +would get if the area looked exactly like the nation, just with fewer +people. The optimizer adjusts `x[i]` away from 1.0 only as much as +needed to match area-specific targets within tolerance. + +### Targets and constraints + +Each **target** is a weighted sum we want the area to match — for +example, "total wages in the $50K-$75K AGI bin for Alabama CD-1 +should be $2.01 billion." The constraint matrix `B` encodes which +records contribute to each target and by how much. + +Targets come from IRS Statistics of Income (SOI) data, which +publishes geographic breakdowns of income, deductions, and credits. +We scale SOI geographic proportions by TMD national totals to get +area-level targets that sum exactly to national TMD values. + +### Why this formulation works + +We chose quadratic programming because: + +1. **Sensible default:** `x[i] = 1` means "this record gets its + population-proportional share." The optimizer only departs from + this when the data requires it. +2. **Robust:** The quadratic objective + linear constraints always + has a unique solution (given feasibility), making results + reproducible across machines. +3. **Fast:** The Clarabel QP solver handles 215,000 records × 92 + constraints in ~40 seconds per area. +4. **Elastic slack:** If a target can't be hit exactly, slack + variables absorb the gap rather than making the problem infeasible. + +### Concrete example: Alabama CD-1 + +AL-01 has about 315,000 tax returns (0.17% of US returns) and +718,000 people (0.21% of US population). Its pop_share is 0.00215. + +Before optimization, every record gets `x[i] = 1`, meaning the +area looks exactly like 0.215% of the nation. But AL-01 has +relatively more retirees and fewer high-wage earners than the US +average. + +The optimizer adjusts: records with pension income get `x` slightly +above 1.0 (more weight), while records with very high wages get `x` +below 1.0 (less weight). For AL-01, the median multiplier is 0.93 +and the RMSE from 1.0 is 0.48 — most records stay close to their +starting weights. + +The 92 targets constrain things like "AL-01 wages in the $100K-$200K +bin should be $4.35B" and "AL-01 total single returns should be +150,125." The optimizer finds the smallest x-adjustments that hit +all 92 targets within ±0.5%. + +## Shares: separating geography from levels + +Before diving into the pipeline architecture, it helps to understand +why we decompose targets into **shares** and **national sums**. + +A target like "wages in the $50K-$75K AGI bin for AL01" depends on +two things: +1. What fraction of national wages in that bin belong to AL01? + (geographic distribution — from SOI) +2. What are national wages in that bin? + (national level — from TMD) + +Item 1 comes from IRS SOI data and changes only with a new SOI +vintage (~annually). Item 2 comes from TMD and changes every time +imputations are updated. + + share = area_SOI_value / national_SOI_value + target = TMD_national_sum × share + +By pre-computing and saving shares, we avoid re-ingesting SOI data +on every TMD rebuild. The shares file is a stable artifact; only the +TMD national sums need recomputing when the microdata changes. + +## Architecture + +``` +SOI data + crosswalks → shares (stable) ← rarely changes + ↓ +TMD data (cached_allvars) → national sums ← changes with TMD rebuilds + ↓ + shares × national sums = potential targets + ↓ + target spec → select from potential ← changes during recipe tuning + ↓ + per-area _targets.csv files + ↓ + QP solver (Clarabel) + ↓ + per-area _tmd_weights.csv.gz files +``` + +Three artifacts, three change frequencies: + +| Artifact | Example | Changes when | +|----------|---------|-------------| +| Shares file | `cds_shares.csv` | New SOI vintage (~annually) | +| Target spec | `cd_target_spec.csv` | Recipe tuning | +| Target files | `al01_targets.csv` | TMD rebuild or recipe change | + +## Variable Name Mapping + +The pipeline bridges three naming systems: + +| SOI raw | SOI base name | TMD/Tax-Calculator | Description | +|---------|---------------|-------------------|-------------| +| A00100 | 00100 | c00100 | AGI (computed) | +| A00200 | 00200 | e00200 | Wages (input) | +| A01700 | 01700 | e01500 *and* e01700 | Pensions (total vs taxable) | +| A02500 | 02500 | e02400 *and* c02500 | Social Security (total vs taxable) | +| N1 | n1 | c00100 (count=1) | Number of returns | +| MARS1 | mars1 | c00100 (count=1, fstatus=1) | Single returns | + +Key subtlety: multiple TMD variables can share the same SOI geographic +distribution. For example, both `e01500` (total pensions) and `e01700` +(taxable pensions) use SOI `A01700` for their geographic shares because +SOI only publishes the taxable component. The shares are the same — +only the TMD national sum differs. + +The mapping is defined in `ALL_SHARING_MAPPINGS` in `constants.py`. +Extended targets add more mappings in `EXTENDED_SHARING_MAPPINGS` +in `prepare_shares.py`. + +## The Target Spec + +The spec is a flat CSV where each row is one target — what you see is +what gets solved. + +```csv +varname,count,scope,fstatus,agilo,agihi,description +XTOT,0,0,0,-9e+99,9e+99,Population amount all bins +c00100,0,1,0,-9e+99,1.0,AGI amount <$0K +c00100,0,1,0,1.0,10000.0,AGI amount $0K-$10K +... +eitc,0,1,0,-9e+99,9e+99,EITC amount all bins +``` + +Column meanings: +- **varname**: TMD variable name +- **count**: 0 = dollar amount, 1 = all returns, 2 = nonzero count +- **scope**: 0 = all records (XTOT only), 1 = PUF records +- **fstatus**: 0 = all, 1 = single, 2 = MFJ, 4 = HoH +- **agilo/agihi**: AGI bin boundaries (-9e99/9e99 = all bins) +- **description**: human-readable label (ignored by pipeline) + +To add a target: add a row. To remove one: delete the row. +No crossing, no exclude lists, no indirection. + +## Searching for Proxies + +Not every TMD variable has a direct SOI counterpart. When the +variable you want to target doesn't have SOI data at the right +geographic level, you need a proxy. + +**Strategy for finding proxies:** + +1. **Direct match:** TMD `e00200` (wages) → SOI `A00200` (wages). + Best case. + +2. **Related variable:** TMD `e01500` (total pensions) → SOI `A01700` + (taxable pensions). The taxable component has a similar geographic + distribution to total pensions. + +3. **Census data:** SALT deductions → Census state/local finance data + provides property tax and sales tax collections by state. Better + geographic distribution than SOI for capped deductions. Only + available at the state level, not CD or county. + +4. **State-average approximation:** When proxy data exists at a coarser + level (e.g., state but not CD), use the coarse share and distribute + within the state by SOI proportions. Example: CD SALT targets use + SOI CD columns (a18425, a18500) as a proxy for Census state data. + +5. **Aggregate (no AGI breakdown):** For variables where per-bin + geographic variation is unreliable, use a single all-bins target. + Examples: EITC, CTC. + +## Establishing Base Targets and Expanding Incrementally + +Start conservative, expand as feasibility allows. + +### Base targets (high confidence) +- Income amounts by AGI bin: AGI, wages, interest, pensions, SS, SALT, partnership +- Return counts in upper-income bins ($25K+) +- Filing-status totals (single, MFJ, HoH — one all-bins target each) +- Population (XTOT) + +### First extension: total-only (one target per variable, no bins) +- Additional income types: dividends, business income, capital gains +- Deductions: mortgage interest, charitable +- SALT components: income/sales, real estate +- Credits: EITC, CTC (amount + nonzero count) + +Total-only targets are almost risk-free — they add one constraint +each and the solver has full freedom to distribute across bins. + +### Second extension: per-bin for selected variables and stubs +- Capital gains in upper stubs ($100K+) — rich people have them +- Use developer mode difficulty table to assess feasibility + +### What NOT to target +- Variables with very thin cells (few records in a bin) +- Variables where the SOI and TMD definitions diverge significantly +- Negative-AGI bins for variables concentrated among retirees (e02400) +- Per-bin credit targets (EITC, CTC) — see targeting rules below + +## Targeting Rules of Thumb + +Lessons learned from CD pipeline development (436 areas). + +### 1. Target difficulty = gap from proportionate share + +Use `python -m tmd.areas.developer_tools --difficulty AL01` to see how +far each target is from what the area would get under population- +proportionate allocation. This is the single most useful diagnostic. + +- **Easy (<5% gap):** Solver barely moves weights. Free to add. +- **Moderate (5–20%):** Some weight distortion. Generally fine. +- **Hard (20–50%):** Significant weight movement. Worth targeting + if the variable is policy-relevant, but watch for interactions. +- **Very hard (>50%):** Extreme weight distortion. May destabilize + other targets. Consider total-only instead of per-bin. + +Example: Alabama CD-1 mean |gap| = 23%, manageable. Manhattan +(NY-12) mean |gap| = 340% — an extreme outlier requiring many +dropped targets or raised tolerances. + +### 2. Solve time scales super-linearly with target count + +Clarabel QP solver time scales worse than O(n²) in the number of +targets. Benchmarks on a single CD (AL-01): + +| Targets | All-bin rows | Solve time | Notes | +|---------|-------------|-----------|-------| +| 78 | 5 | 7s | Base recipe | +| 92 | 19 | 12s | +14 total-only extended | +| 95 | 19 | 14s | +3 capgains upper bins | +| 107 | 19 | 92s | +12 credit per-bin (problematic) | + +The cost of additional targets depends on what they are, not just +how many. + +### 3. Dense constraint rows are expensive + +An "all-bin" target (agilo=-inf, agihi=+inf) touches every PUF +record in the B matrix (~97% of records). A per-bin target touches +only records in that AGI bin (~5–30%). Dense rows make the solver's +matrix factorization harder. + +However, total-only targets are still worthwhile — they add modest +cost (12s→14s for 14 total-only targets) and constrain aggregate +quantities that would otherwise drift. + +### 4. Per-bin credit targets are extremely difficult + +EITC and CTC have sharp eligibility cliffs. Per-bin targets require +the solver to match both income distribution AND credit distribution +within each bin simultaneously. For AL-01: +- EITC $10K–$25K: +86% gap (needs heavy upweighting) +- CTC $10K–$25K: -89% gap (needs heavy downweighting) +- These pull weights in opposite directions → solver struggles + +**Recommendation:** Target credits as all-bin totals only. The +totals constrain aggregate credit amounts without forcing per-bin +precision that the microdata can't deliver. + +### 5. Capital gains per-bin targets are feasible (for upper stubs) + +Capital gains are concentrated in high-income bins where there are +plenty of records. Adding 3 per-bin targets ($100K+) costs only +1.5s and all hit. The gap is moderate (~50% for $500K+). + +### 6. Conflicting targets cause solver explosions + +When two targets require opposite weight adjustments for overlapping +record sets, the solver thrashes. Signs of conflict: +- Solve time jumps disproportionately (12s → 92s for 15 targets) +- Many violated targets in the solution +- High RMSE (weight multipliers far from 1.0) + +Use the difficulty table to spot targets with large gaps in opposite +directions. If variable A needs +60% and variable B needs -60% in +the same AGI bin, one should be dropped or made total-only. + +### 7. Start total-only, then add bins selectively + +The incremental approach that works: +1. Total-only targets for all variables (low risk, fast) +2. Per-bin targets for variables where geography matters most and + the difficulty table shows moderate gaps +3. Use developer mode to test each addition on a few representative + areas before committing to a full batch run + +### 8. Area-specific overrides are normal + +Not every area can hit every target. The override YAML file records +per-area adjustments. For 436 CDs with 95 targets: +- ~80% solve with default params +- ~17% need 1–8 targets dropped +- ~3% need raised tolerance or multiplier cap + +This is acceptable. The alternative — a recipe so conservative that +every area solves — would sacrifice accuracy for the 80% of areas +that can handle more targets. + +## Developer Workflow: Expanding a Recipe + +Step-by-step process for adding new targets to a recipe. + +### Step 1: Identify high-value targets + +Decide which variables matter for your use case. Rank by policy +importance. Income variables (wages, AGI, capital gains) are +usually more important than deduction details. Credits matter +for distributional analysis. + +### Step 2: Run difficulty tables on representative areas + +Pick 3–4 areas spanning the difficulty spectrum: +- A typical/easy area (e.g., AL-01, MN-03) +- A hard area (e.g., NY-12, TX-20) +- An area similar to your analysis focus + +```bash +python -m tmd.areas.developer_tools --difficulty AL01 +python -m tmd.areas.developer_tools --difficulty NY12 +``` + +For each proposed target, check the gap%. If most areas show +<30% gap, the target is likely feasible. If the hard areas +show >100% gap, consider total-only instead of per-bin. + +### Step 3: Test on a single easy area + +Add the new targets to the spec and solve one easy area: + +```bash +python -m tmd.areas.prepare_targets --scope AL01 +python -m tmd.areas.solve_weights --scope AL01 +``` + +Check solve time, violations, and RMSE. If solve time jumps +disproportionately (e.g., 12s → 90s for 15 new targets), +the new targets have constraint interactions. Try adding +them one at a time to find the culprit. + +### Step 4: Test on a hard area + +Repeat on NY-12 or another difficult CD. If it fails, the +auto-relaxation cascade can find which targets to drop: + +```bash +python -m tmd.areas.developer_tools --scope NY12 --verbose +``` + +### Step 5: Run dual analysis on problem areas + +If a target causes unexpected solver difficulty despite a moderate +gap%, check the shadow prices: + +```bash +python -m tmd.areas.developer_tools --dual AL01 +``` + +High duals identify constraints that conflict with each other. +A target may look easy (10% gap) but have a massive dual because +it pulls against another target in the same record set. + +### Step 6: Full batch run + quality report + +Once satisfied with representative areas, run the full batch: + +```bash +python -m tmd.areas.developer_tools --scope cds --workers 16 +python -m tmd.areas.solve_weights --scope cds --workers 16 +python -m tmd.areas.quality_report --scope cds --output +``` + +Compare the quality report against the previous version. +Check bystander distortion for the newly targeted variables. + +### Step 7: Iterate + +If too many areas need overrides, the recipe is too aggressive. +If bystanders show high distortion, more targeting is needed. +The goal is a recipe where ~80% of areas solve cleanly and the +remainder need minor per-area adjustments. + +## Developer Mode + +A toolkit of diagnostics and automated relaxation for area weights. + +### When to run +- After changing the target spec (adding/removing targets) +- After a new SOI data vintage (shares changed) +- After significant TMD data changes + +### How it works + +For each area, developer mode tries a relaxation cascade: + +1. **Level 0:** Solve with default parameters +2. **Level 1:** Drop unreachable targets (automatic) +3. **Level 2:** Reduce slack penalties on problematic constraints +4. **Level 3:** Drop specific targets identified by LP feasibility +5. **Level 4:** Raise multiplier cap (50x → 100x) +6. **Level 5:** Raise constraint tolerance (0.5% → 1.0%) + +Most areas solve at level 0. A handful of extreme areas (e.g., NY-12 +/ Manhattan with its extreme high-income profile) need level 3. + +### Usage + +```bash +# LP feasibility check only (fast diagnostic): +python -m tmd.areas.developer_tools --scope cds --lp-only --workers 16 + +# Full relaxation cascade: +python -m tmd.areas.developer_tools --scope cds --workers 16 + +# Debug a single area: +python -m tmd.areas.developer_tools --scope NY12 --verbose +``` + +### Output +- **Override YAML:** `prepare/recipes/cd_solver_overrides.yaml` — + committed to repo, read by production solver +- **Developer report:** `weights/cds/developer_report.txt` — + per-area relaxation details + +### Override file format + +```yaml +_defaults: + multiplier_max: 50 + constraint_tol: 0.005 + +ny12: + drop_targets: + - "c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0" + - "e26270/cnt=0/scope=1/agi=[100000.0,200000.0)/fs=0" +``` + +The production solver reads this file and applies per-area +customizations automatically. No manual tuning needed. + +## Updating for a New Year + +### New SOI data vintage + +1. **Get the data:** Download SOI CSV files for the new year. + Place in `prepare/data/soi_states/` or `prepare/data/soi_cds/`. + +2. **Update constants:** Add the new year's CSV filename to + `SOI_STATE_CSV_PATTERNS` or `SOI_CD_CSV_PATTERNS` in `constants.py`. + Check if AGI stubs changed (rare but possible). + +3. **Recompute shares:** + ```bash + python -m tmd.areas.prepare_shares --scope states --year 2023 + python -m tmd.areas.prepare_shares --scope cds --year 2023 + ``` + +4. **Regenerate targets:** + ```bash + python -m tmd.areas.prepare_targets --scope cds + ``` + +5. **Run developer mode:** + ```bash + python -m tmd.areas.developer_tools --scope cds --workers 16 + ``` + +6. **Solve weights:** + ```bash + python -m tmd.areas.solve_weights --scope cds --workers 16 + ``` + +7. **Quality check:** + ```bash + python -m tmd.areas.quality_report --scope cds --output + ``` + +### New TMD rebuild (same SOI year) + +Only steps 4-7 needed — shares don't change. + +### Adding a new target variable + +1. Check if SOI has the variable (or a proxy) at the right + geographic level. + +2. Add the mapping to `EXTENDED_SHARING_MAPPINGS` in `prepare_shares.py`. + +3. Recompute shares: `python -m tmd.areas.prepare_shares --scope cds` + +4. Add a row to the target spec CSV. + +5. Regenerate targets and run developer mode to check feasibility. + +### Adding a new area type (e.g., counties) + +1. Write SOI data ingestion module (like `soi_cd_data.py`). + +2. Define AGI cuts and area type in `constants.py`. + +3. Create a target spec and shares file. + +4. Add scope handling to `solve_weights.py` and `quality_report.py`. + +5. Start with a very conservative recipe — counties as small as 40 + returns will need far fewer targets than CDs. Use tiered recipes + by county size. + +## File Locations + +``` +tmd/areas/ +├── prepare/ +│ ├── constants.py # AGI cuts, ALL_SHARING_MAPPINGS, AreaType +│ ├── recipes/ +│ │ ├── cd_target_spec.csv # CD recipe (92 targets) +│ │ ├── state_target_spec.csv # State recipe (179 targets) +│ │ ├── cd_solver_overrides.yaml # Per-area solver params +│ │ ├── cds.json # [legacy] JSON recipe +│ │ └── states.json # [legacy] JSON recipe +│ ├── data/ +│ │ ├── soi_states/ # Raw SOI state CSVs +│ │ ├── soi_cds/ # Raw SOI CD CSVs +│ │ ├── cds_shares.csv # Pre-computed CD shares +│ │ └── states_shares.csv # Pre-computed state shares +│ ├── target_sharing.py # Share computation, TMD national sums +│ ├── target_file_writer.py # [legacy] Recipe-based target writing +│ ├── soi_state_data.py # State SOI data ingestion +│ ├── soi_cd_data.py # CD SOI data + crosswalk +│ └── extended_targets.py # State extended targets (Census, credits) +├── prepare_targets.py # Target file generation (spec-based) +├── prepare_shares.py # Share pre-computation +├── developer_tools.py # Auto-relaxation cascade +├── solve_weights.py # QP batch solver +├── create_area_weights.py # QP solver core (Clarabel) +├── quality_report.py # Cross-area quality diagnostics +├── solver_overrides.py # Per-area override management +├── targets/ +│ ├── states/ # Per-state target CSVs +│ └── cds/ # Per-CD target CSVs +└── weights/ + ├── states/ # Per-state weight files + logs + └── cds/ # Per-CD weight files + logs +``` + +## Quality Report + +The quality report (`python -m tmd.areas.quality_report --scope cds`) +provides: + +- **Target accuracy:** Per-area hit rates, violation details +- **Weight distortion:** Multiplier distribution (how far weights moved from population-proportional) +- **Weight distribution by AGI stub:** National vs sum-of-areas returns and AGI per bin +- **Weight exhaustion:** Whether records are over/under-used across areas +- **Cross-area aggregation:** Sum-of-areas vs national for key variables +- **Bystander analysis:** Distortion of untargeted variables (both aggregate and per-bin) + +Use `--output` to auto-save to file. For CDs/counties, only the top +20 most-distorted areas are shown in the per-area table. diff --git a/tmd/areas/Makefile b/tmd/areas/Makefile new file mode 100644 index 00000000..65ab1aba --- /dev/null +++ b/tmd/areas/Makefile @@ -0,0 +1,49 @@ +# Area weighting pipeline: states and congressional districts. +# +# Prerequisites: run `make data` from the repo root first. +# +# Usage from repo root: +# make -C tmd/areas states +# make -C tmd/areas cds WORKERS=16 +# make -C tmd/areas state-targets (shares + targets only) +# make -C tmd/areas cd-targets (shares + targets only) +# +# Override worker count: make -C tmd/areas cds WORKERS=16 + +WORKERS ?= 8 +ROOT := $(realpath $(CURDIR)/../..) + +# --- State pipeline --- + +.PHONY: state-shares state-targets state-weights states + +state-shares: + python -m tmd.areas.prepare_shares --scope states + +state-targets: state-shares + python -m tmd.areas.prepare_targets --scope states + +state-weights: state-targets + python -m tmd.areas.solve_weights --scope states --workers $(WORKERS) + +states: state-weights + python -m pytest $(ROOT)/tests/test_prepare_targets.py -v -k "not CD" + python -m pytest $(ROOT)/tests/test_state_weight_results.py -v + python -m tmd.areas.quality_report --scope states + +# --- Congressional district pipeline --- + +.PHONY: cd-shares cd-targets cd-weights cds + +cd-shares: + python -m tmd.areas.prepare_shares --scope cds + +cd-targets: cd-shares + python -m tmd.areas.prepare_targets --scope cds + +cd-weights: cd-targets + python -m tmd.areas.solve_weights --scope cds --workers $(WORKERS) + +cds: cd-weights + python -m pytest $(ROOT)/tests/test_prepare_targets.py -v -k CD + python -m tmd.areas.quality_report --scope cds diff --git a/tmd/areas/developer_tools.py b/tmd/areas/developer_tools.py new file mode 100644 index 00000000..7eac0201 --- /dev/null +++ b/tmd/areas/developer_tools.py @@ -0,0 +1,956 @@ +# pylint: disable=import-outside-toplevel,inconsistent-quotes +""" +Developer mode — iterative LP/QP auto-relaxation for area weights. + +Runs an automated relaxation cascade on each area to find the +least-invasive parameter adjustments that make the solver succeed. +Produces a per-area override YAML file committed to the repo. + +Relaxation cascade (least invasive first): + Level 0: Full spec, default params + Level 1: Drop unreachable targets (automatic in solver) + Level 2: Reduce slack penalty on problematic targets + Level 3: Drop specific targets identified by LP feasibility + Level 4: Raise multiplier cap + Level 5: Raise constraint tolerance + +Usage: + python -m tmd.areas.developer_tools --scope cds --workers 16 + python -m tmd.areas.developer_tools --scope cds --lp-only + python -m tmd.areas.developer_tools --scope NY12 --verbose +""" + +import argparse +import io +import sys +import time +from concurrent.futures import ProcessPoolExecutor, as_completed +from pathlib import Path + +import numpy as np +import pandas as pd + +from tmd.areas.create_area_weights import ( + AREA_CONSTRAINT_TOL, + AREA_MULTIPLIER_MIN, + AREA_SLACK_PENALTY, + CD_MULTIPLIER_MAX, + CD_TARGET_DIR, + CD_WEIGHT_DIR, + STATE_TARGET_DIR, + STATE_WEIGHT_DIR, + _assign_slack_penalties, + _build_constraint_matrix, + _check_feasibility, + _drop_impossible_targets, + _load_taxcalc_data, + _solve_area_qp, +) +from tmd.areas.solver_overrides import write_overrides + +# --- Configuration --- + +_RECIPES = Path(__file__).parent / "prepare" / "recipes" +_CD_OVERRIDES = _RECIPES / "cd_solver_overrides.yaml" +_STATE_OVERRIDES = _RECIPES / "state_solver_overrides.yaml" + +# Max targets to drop per area before giving up +_MAX_DROPS = 10 + +# Relaxation cascade parameters +_LEVEL_LABELS = { + 0: "default", + 1: "auto-drop unreachable", + 2: "reduced slack", + 3: "drop targets", + 4: "raise multiplier cap", + 5: "raise tolerance", +} + + +def _run_lp_feasibility( + area, + vdf, + target_dir, + multiplier_max, + constraint_tol=AREA_CONSTRAINT_TOL, +): + """ + Run LP feasibility check for one area. + + Returns dict with: + area, feasible, n_targets, n_infeasible, + worst_labels [(label, slack, rel_slack)] + """ + out = io.StringIO() + B_csc, targets, labels, _pop = _build_constraint_matrix( + area, vdf, out, target_dir=target_dir + ) + B_csc, targets, labels = _drop_impossible_targets( + B_csc, + targets, + labels, + out, + multiplier_max=multiplier_max, + constraint_tol=constraint_tol, + ) + n_records = B_csc.shape[1] + info = _check_feasibility( + B_csc, + targets, + labels, + n_records, + constraint_tol=constraint_tol, + multiplier_min=AREA_MULTIPLIER_MIN, + multiplier_max=multiplier_max, + out=out, + ) + return { + "area": area, + "feasible": info["feasible"], + "n_targets": len(targets), + "worst_labels": info.get("worst_labels", []), + "log": out.getvalue(), + } + + +def _solve_with_params( + area, + vdf, + target_dir, + multiplier_max, + constraint_tol=AREA_CONSTRAINT_TOL, + slack_penalty=AREA_SLACK_PENALTY, + drop_labels=None, +): + """ + Solve one area with given parameters. + + Returns dict with solve results. + """ + out = io.StringIO() + B_csc, targets, labels, _pop_share = _build_constraint_matrix( + area, vdf, out, target_dir=target_dir + ) + B_csc, targets, labels = _drop_impossible_targets( + B_csc, + targets, + labels, + out, + multiplier_max=multiplier_max, + constraint_tol=constraint_tol, + ) + + # Apply explicit drops + if drop_labels: + from tmd.areas.solver_overrides import should_drop_target + + keep = [not should_drop_target(lbl, drop_labels) for lbl in labels] + n_drop = sum(1 for k in keep if not k) + if n_drop > 0: + from scipy.sparse import csc_matrix + + keep_arr = np.array(keep) + B_dense = B_csc.toarray() + B_csc = csc_matrix(B_dense[keep_arr, :]) + targets = targets[keep_arr] + labels = [lbl for lbl, k in zip(labels, keep) if k] + + n_records = B_csc.shape[1] + + # Assign slack penalties + per_constraint_penalties = _assign_slack_penalties( + labels, default_penalty=slack_penalty + ) + + # Solve QP + x_opt, _s_lo, _s_hi, info = _solve_area_qp( + B_csc, + targets, + labels, + n_records, + constraint_tol=constraint_tol, + slack_penalties=per_constraint_penalties, + multiplier_min=AREA_MULTIPLIER_MIN, + multiplier_max=multiplier_max, + out=out, + ) + + # Compute violations + achieved = np.asarray(B_csc @ x_opt).ravel() + rel_errors = np.abs(achieved - targets) / np.maximum(np.abs(targets), 1.0) + eps = 1e-9 + viol_mask = rel_errors > constraint_tol + eps + n_violated = int(viol_mask.sum()) + max_viol = float(rel_errors.max()) if len(rel_errors) > 0 else 0 + + # Identify violated labels + violated_labels = [] + if n_violated > 0: + for idx in np.where(viol_mask)[0]: + violated_labels.append((labels[idx], float(rel_errors[idx]))) + violated_labels.sort(key=lambda x: -x[1]) + + rmse = float(np.sqrt(np.mean((x_opt - 1.0) ** 2))) + + return { + "area": area, + "status": info["status"], + "n_targets": len(targets), + "n_violated": n_violated, + "max_viol": max_viol, + "rmse": rmse, + "violated_labels": violated_labels, + "log": out.getvalue(), + } + + +def _relaxation_cascade( + area, + vdf, + target_dir, + multiplier_max, + constraint_tol=AREA_CONSTRAINT_TOL, + verbose=False, +): + """ + Run relaxation cascade for one area. + + Tries increasingly aggressive relaxations until the area + solves with acceptable violations. + + Returns (level, overrides_dict, solve_result). + """ + # Level 0: default params + result = _solve_with_params( + area, + vdf, + target_dir, + multiplier_max=multiplier_max, + constraint_tol=constraint_tol, + ) + + if result["n_violated"] == 0 and "Solved" in result["status"]: + return 0, {}, result + + if verbose: + print( + f" {area}: Level 0 — {result['n_violated']} violations," + f" status={result['status']}" + ) + + # Level 2: LP feasibility to identify problematic constraints, + # then drop them one at a time + lp_info = _run_lp_feasibility( + area, vdf, target_dir, multiplier_max, constraint_tol + ) + + drop_labels = [] + if not lp_info["feasible"] or result["n_violated"] > 0: + # Get worst constraints from LP + worst = lp_info.get("worst_labels", []) + # Also add violated labels from QP + for lbl, _err in result.get("violated_labels", []): + if lbl not in [w[0] for w in worst]: + worst.append((lbl, 0, 0)) + + # Level 3: Drop targets iteratively + for i, (lbl, *_) in enumerate(worst): + if i >= _MAX_DROPS: + break + drop_labels.append(lbl) + result = _solve_with_params( + area, + vdf, + target_dir, + multiplier_max=multiplier_max, + constraint_tol=constraint_tol, + drop_labels=drop_labels, + ) + if verbose: + print( + f" {area}: Level 3 — dropped {len(drop_labels)}," + f" {result['n_violated']} violations" + ) + if result["n_violated"] == 0 and "Solved" in result["status"]: + overrides = {"drop_targets": list(drop_labels)} + return 3, overrides, result + + # Level 4: Raise multiplier cap + higher_cap = min(multiplier_max * 2, 200) + result = _solve_with_params( + area, + vdf, + target_dir, + multiplier_max=higher_cap, + constraint_tol=constraint_tol, + drop_labels=drop_labels, + ) + if result["n_violated"] == 0 and "Solved" in result["status"]: + overrides = {"multiplier_max": higher_cap} + if drop_labels: + overrides["drop_targets"] = list(drop_labels) + return 4, overrides, result + if verbose: + print( + f" {area}: Level 4 — cap={higher_cap}," + f" {result['n_violated']} violations" + ) + + # Level 5: Raise tolerance + higher_tol = constraint_tol * 2 + result = _solve_with_params( + area, + vdf, + target_dir, + multiplier_max=higher_cap, + constraint_tol=higher_tol, + drop_labels=drop_labels, + ) + overrides = { + "multiplier_max": higher_cap, + "constraint_tol": higher_tol, + } + if drop_labels: + overrides["drop_targets"] = list(drop_labels) + + level = 5 if result["n_violated"] == 0 else 5 + if verbose: + status = "OK" if result["n_violated"] == 0 else "STILL FAILING" + print( + f" {area}: Level 5 — tol={higher_tol}," + f" {result['n_violated']} violations ({status})" + ) + + return level, overrides, result + + +# Module-level cache for worker processes +_WORKER_VDF = None +_WORKER_TARGET_DIR = None +_WORKER_MULTIPLIER_MAX = None +_WORKER_CONSTRAINT_TOL = None +_WORKER_VERBOSE = False + + +def _init_worker(target_dir, multiplier_max, constraint_tol, verbose): + """Initialize worker process with TMD data.""" + # pylint: disable=global-statement + global _WORKER_VDF, _WORKER_TARGET_DIR + global _WORKER_MULTIPLIER_MAX, _WORKER_CONSTRAINT_TOL + global _WORKER_VERBOSE + # pylint: enable=global-statement + _WORKER_TARGET_DIR = Path(target_dir) + _WORKER_MULTIPLIER_MAX = multiplier_max + _WORKER_CONSTRAINT_TOL = constraint_tol + _WORKER_VERBOSE = verbose + if _WORKER_VDF is None: + _WORKER_VDF = _load_taxcalc_data() + + +def _process_area(area): + """Process one area through the relaxation cascade.""" + _init_worker( + _WORKER_TARGET_DIR, + _WORKER_MULTIPLIER_MAX, + _WORKER_CONSTRAINT_TOL, + _WORKER_VERBOSE, + ) + level, overrides, result = _relaxation_cascade( + area, + _WORKER_VDF, + _WORKER_TARGET_DIR, + multiplier_max=_WORKER_MULTIPLIER_MAX, + constraint_tol=_WORKER_CONSTRAINT_TOL, + verbose=_WORKER_VERBOSE, + ) + return ( + area, + level, + overrides, + { + "status": result["status"], + "n_targets": result["n_targets"], + "n_violated": result["n_violated"], + "max_viol": result["max_viol"], + "rmse": result["rmse"], + }, + ) + + +def _lp_only_area(area): + """Run LP feasibility check only for one area.""" + _init_worker( + _WORKER_TARGET_DIR, + _WORKER_MULTIPLIER_MAX, + _WORKER_CONSTRAINT_TOL, + False, + ) + return _run_lp_feasibility( + area, + _WORKER_VDF, + _WORKER_TARGET_DIR, + _WORKER_MULTIPLIER_MAX, + _WORKER_CONSTRAINT_TOL, + ) + + +def run_developer_tools( + scope="cds", + num_workers=1, + lp_only=False, + verbose=False, +): + """ + Run developer mode: iterative relaxation for all areas. + + Parameters + ---------- + scope : str + 'cds', 'states', or comma-separated area codes. + num_workers : int + Number of parallel workers. + lp_only : bool + If True, only run LP feasibility (no QP solve). + verbose : bool + Print per-area progress. + """ + scope_lower = scope.lower().strip() + first_code = scope.split(",")[0].strip() + is_cd = scope_lower == "cds" or len(first_code) > 2 + + if is_cd: + target_dir = CD_TARGET_DIR + weight_dir = CD_WEIGHT_DIR + override_path = _CD_OVERRIDES + multiplier_max = CD_MULTIPLIER_MAX + else: + target_dir = STATE_TARGET_DIR + weight_dir = STATE_WEIGHT_DIR + override_path = _STATE_OVERRIDES + multiplier_max = 25.0 + + constraint_tol = AREA_CONSTRAINT_TOL + + # List areas from target files + areas = sorted( + p.name.split("_")[0] for p in target_dir.glob("*_targets.csv") + ) + if scope_lower not in ("cds", "states", "all"): + codes = [c.strip().lower() for c in scope.split(",")] + areas = [a for a in areas if a in codes] + + n_areas = len(areas) + print(f"Developer mode: {n_areas} areas, {num_workers} workers") + print(f"Target dir: {target_dir}") + t_start = time.time() + + if lp_only: + # LP feasibility only + print("Running LP feasibility checks...") + results = [] + with ProcessPoolExecutor( + max_workers=num_workers, + initializer=_init_worker, + initargs=( + str(target_dir), + multiplier_max, + constraint_tol, + False, + ), + ) as executor: + futures = {executor.submit(_lp_only_area, a): a for a in areas} + for future in as_completed(futures): + results.append(future.result()) + + feasible = sum(1 for r in results if r["feasible"]) + infeasible = [r for r in results if not r["feasible"]] + elapsed = time.time() - t_start + + print( + f"\nLP Feasibility: {feasible}/{n_areas} feasible" + f" ({elapsed:.1f}s)" + ) + if infeasible: + print(f"\nInfeasible areas ({len(infeasible)}):") + for r in sorted(infeasible, key=lambda x: x["area"]): + worst = r["worst_labels"][:3] + worst_str = ", ".join( + f"{lbl} ({rel:.1%})" for lbl, _, rel in worst + ) + print(f" {r['area']}: {worst_str}") + return + + # Full relaxation cascade + print("Running relaxation cascade...") + area_results = [] + area_overrides = {} + level_counts = {i: 0 for i in range(6)} + + with ProcessPoolExecutor( + max_workers=num_workers, + initializer=_init_worker, + initargs=( + str(target_dir), + multiplier_max, + constraint_tol, + verbose, + ), + ) as executor: + futures = {executor.submit(_process_area, a): a for a in areas} + completed = 0 + for future in as_completed(futures): + area, level, overrides, stats = future.result() + completed += 1 + area_results.append((area, level, overrides, stats)) + level_counts[level] = level_counts.get(level, 0) + 1 + if overrides: + area_overrides[area] = overrides + + # Progress + if completed % 10 == 0 or completed == n_areas: + elapsed = time.time() - t_start + sys.stdout.write( + f"\r {completed}/{n_areas}" f" [{elapsed:.0f}s]" + ) + sys.stdout.flush() + + sys.stdout.write("\n") + elapsed = time.time() - t_start + + # Write override file + defaults = { + "multiplier_max": multiplier_max, + "constraint_tol": constraint_tol, + } + write_overrides(override_path, defaults, area_overrides) + + # Summary + print(f"\nCompleted in {elapsed:.0f}s") + print(f"Override file: {override_path}") + print("\nRelaxation levels:") + for level in sorted(level_counts.keys()): + cnt = level_counts[level] + if cnt > 0: + label = _LEVEL_LABELS.get(level, f"level {level}") + print(f" Level {level} ({label}): {cnt} areas") + + n_overrides = len(area_overrides) + if n_overrides > 0: + print(f"\n{n_overrides} areas need overrides:") + for area, level, overrides, stats in sorted( + area_results, key=lambda x: -x[1] + ): + if not overrides: + continue + drops = len(overrides.get("drop_targets", [])) + extra = [] + if drops: + extra.append(f"{drops} dropped") + if "multiplier_max" in overrides: + extra.append(f"cap={overrides['multiplier_max']}") + if "constraint_tol" in overrides: + extra.append(f"tol={overrides['constraint_tol']}") + detail = ", ".join(extra) + print( + f" {area}: level {level}," + f" {stats['n_violated']} violations," + f" RMSE={stats['rmse']:.3f}" + f" ({detail})" + ) + + # Write developer report + report_path = weight_dir / "developer_report.txt" + _write_report( + report_path, + area_results, + level_counts, + n_areas, + elapsed, + override_path, + ) + print(f"Report: {report_path}") + + +def _write_report( + report_path, area_results, level_counts, n_areas, elapsed, override_path +): + """Write detailed developer mode report.""" + from datetime import datetime + + lines = [] + lines.append("=" * 70) + lines.append("DEVELOPER MODE REPORT") + lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + lines.append(f"Areas: {n_areas}, Time: {elapsed:.0f}s") + lines.append(f"Override file: {override_path}") + lines.append("=" * 70) + lines.append("") + + # Level summary + lines.append("RELAXATION LEVELS:") + for level in sorted(level_counts.keys()): + cnt = level_counts[level] + if cnt > 0: + label = _LEVEL_LABELS.get(level, f"level {level}") + lines.append(f" Level {level} ({label}): {cnt}") + lines.append("") + + # Per-area detail (only non-level-0) + problem_areas = [ + (a, lv, ov, st) for a, lv, ov, st in area_results if lv > 0 + ] + if problem_areas: + lines.append("AREAS REQUIRING RELAXATION:") + lines.append( + f" {'Area':<6} {'Lvl':>3} {'Viol':>4}" + f" {'RMSE':>7} {'Drops':>5} {'Details'}" + ) + lines.append(" " + "-" * 60) + for area, level, overrides, stats in sorted( + problem_areas, key=lambda x: (-x[1], x[0]) + ): + drops = len(overrides.get("drop_targets", [])) + details = [] + for dt in overrides.get("drop_targets", [])[:3]: + details.append(f"drop:{dt}") + if "multiplier_max" in overrides: + details.append(f"cap={overrides['multiplier_max']}") + if "constraint_tol" in overrides: + details.append(f"tol={overrides['constraint_tol']}") + detail_str = "; ".join(details) + lines.append( + f" {area:<6} {level:>3}" + f" {stats['n_violated']:>4}" + f" {stats['rmse']:>7.3f}" + f" {drops:>5}" + f" {detail_str}" + ) + lines.append("") + + # All areas summary + lines.append("ALL AREAS:") + lines.append( + f" {'Area':<6} {'Lvl':>3} {'Tgts':>4}" + f" {'Viol':>4} {'RMSE':>7} {'Status'}" + ) + lines.append(" " + "-" * 40) + for area, level, _ov, stats in sorted(area_results, key=lambda x: x[0]): + lines.append( + f" {area:<6} {level:>3}" + f" {stats['n_targets']:>4}" + f" {stats['n_violated']:>4}" + f" {stats['rmse']:>7.3f}" + f" {stats['status']}" + ) + + report_path.parent.mkdir(parents=True, exist_ok=True) + report_path.write_text("\n".join(lines), encoding="utf-8") + + +def target_difficulty(area, target_dir=None): + """ + Compute target difficulty for a single area. + + For each target, compares the proportionate value (pop_share × + national total) against the actual target. Large gaps indicate + targets that force the solver to distort weights heavily. + + Returns a DataFrame sorted by |gap_pct| descending, and prints + a formatted report. + """ + if target_dir is None: + target_dir = CD_TARGET_DIR + + vardf = _load_taxcalc_data() + s006 = vardf["s006"].values + n = len(vardf) + + targets = pd.read_csv(target_dir / f"{area.lower()}_targets.csv") + + # Get pop_share from XTOT row + xtot_row = targets[targets["varname"] == "XTOT"].iloc[0] + national_pop = float((s006 * vardf["XTOT"].values).sum()) + pop_share = xtot_row["target"] / national_pop + + results = [] + for _, row in targets.iterrows(): + if row["varname"] == "XTOT": + results.append( + { + "varname": "XTOT", + "count": 0, + "fstatus": 0, + "agilo": row["agilo"], + "agihi": row["agihi"], + "national": national_pop, + "proportionate": xtot_row["target"], + "target": row["target"], + "gap": 0, + "gap_pct": 0, + } + ) + continue + + # Build mask (same logic as _build_constraint_matrix) + mask = np.ones(n, dtype=float) + if row["scope"] == 1: + mask *= (vardf["data_source"] == 1).values.astype(float) + in_bin = (vardf["c00100"] >= row["agilo"]) & ( + vardf["c00100"] < row["agihi"] + ) + mask *= in_bin.values.astype(float) + if row["fstatus"] > 0: + mask *= (vardf["MARS"] == row["fstatus"]).values.astype(float) + + if row["count"] == 0: + var_vals = vardf[row["varname"]].astype(float).values + elif row["count"] == 1: + var_vals = np.ones(n, dtype=float) + elif row["count"] == 2: + var_vals = (vardf[row["varname"]] != 0).astype(float).values + else: + var_vals = np.ones(n, dtype=float) + + nat_val = float((s006 * mask * var_vals).sum()) + prop_val = pop_share * nat_val + tgt_val = row["target"] + gap = tgt_val - prop_val + gap_pct = (tgt_val / prop_val - 1) * 100 if abs(prop_val) > 1 else 0 + + results.append( + { + "varname": row["varname"], + "count": int(row["count"]), + "fstatus": int(row["fstatus"]), + "agilo": row["agilo"], + "agihi": row["agihi"], + "national": nat_val, + "proportionate": prop_val, + "target": tgt_val, + "gap": gap, + "gap_pct": gap_pct, + } + ) + + rdf = pd.DataFrame(results) + rdf["abs_gap_pct"] = rdf["gap_pct"].abs() + rdf = rdf.sort_values("abs_gap_pct", ascending=False) + + # Print formatted report + cnt_labels = {0: "amt", 1: "returns", 2: "nz-count"} + fs_labels = {0: "", 1: " single", 2: " MFJ", 4: " HoH"} + + def _fmt_agi(lo, hi): + if lo < -1e10 and hi > 1e10: + return "all" + if lo < -1e10: + return f"<${hi / 1000:.0f}K" + if hi > 1e10: + return f"${lo / 1000:.0f}K+" + return f"${lo / 1000:.0f}K-${hi / 1000:.0f}K" + + print(f"\nTARGET DIFFICULTY: {area.upper()} (pop_share={pop_share:.6f})") + print("Proportionate = what area would get if it looked like the nation") + print( + f"\n{'Target':<45} {'Proportionate':>14}" + f" {'Target':>14} {'Gap%':>8}" + ) + print("-" * 83) + + for _, r in rdf.iterrows(): + cnt = cnt_labels.get(r["count"], f"c{r['count']}") + fs = fs_labels.get(r["fstatus"], "") + agi = _fmt_agi(r["agilo"], r["agihi"]) + label = f"{r['varname']} {cnt}{fs} {agi}" + + if r["count"] == 0: + print( + f"{label:<45}" + f" ${r['proportionate'] / 1e6:>11.1f}M" + f" ${r['target'] / 1e6:>11.1f}M" + f" {r['gap_pct']:>+7.1f}%" + ) + else: + print( + f"{label:<45}" + f" {r['proportionate']:>13,.0f}" + f" {r['target']:>13,.0f}" + f" {r['gap_pct']:>+7.1f}%" + ) + + n_easy = (rdf["abs_gap_pct"] < 5).sum() + n_mod = ((rdf["abs_gap_pct"] >= 5) & (rdf["abs_gap_pct"] < 20)).sum() + n_hard = ((rdf["abs_gap_pct"] >= 20) & (rdf["abs_gap_pct"] < 50)).sum() + n_vhard = (rdf["abs_gap_pct"] >= 50).sum() + print( + f"\nDifficulty: {n_easy} easy (<5%)," + f" {n_mod} moderate (5-20%)," + f" {n_hard} hard (20-50%)," + f" {n_vhard} very hard (>50%)" + ) + print( + f"Mean |gap%|: {rdf['abs_gap_pct'].mean():.1f}%," + f" median: {rdf['abs_gap_pct'].median():.1f}%" + ) + + return rdf + + +def dual_analysis(area, target_dir=None): + """ + Solve a single area and print dual (shadow price) analysis. + + High dual values identify constraints that are binding and + expensive — even targets with moderate gap% may have high duals + because they conflict with other targets. + + Usage: + python -m tmd.areas.developer_tools --dual AL01 + """ + if target_dir is None: + target_dir = CD_TARGET_DIR + + vardf = _load_taxcalc_data() + n_records = len(vardf) + out = io.StringIO() + + B_csc, targets, labels, _pop_share = _build_constraint_matrix( + area.lower(), vardf, out, target_dir=target_dir + ) + B_csc, targets, labels = _drop_impossible_targets( + B_csc, + targets, + labels, + out, + multiplier_max=CD_MULTIPLIER_MAX, + ) + + m = len(targets) + slack_pens = _assign_slack_penalties(labels) + + x_opt, s_lo, s_hi, info = _solve_area_qp( + B_csc, + targets, + labels, + n_records, + multiplier_max=CD_MULTIPLIER_MAX, + slack_penalties=slack_pens, + out=out, + ) + + dual = info.get("dual") + if dual is None: + print("No dual variables available from solver.") + return + + z_upper = dual[:m] + z_lower = dual[m : 2 * m] + z_combined = np.maximum(np.abs(z_upper), np.abs(z_lower)) + + Bx = B_csc @ x_opt + rel_err = np.where( + np.abs(targets) > 1, + (Bx - targets) / np.abs(targets) * 100, + 0, + ) + + rdf = pd.DataFrame( + [ + { + "label": labels[j], + "z": z_combined[j], + "err": rel_err[j], + "slack": max(s_lo[j], s_hi[j]), + } + for j in range(m) + ] + ) + rdf = rdf.sort_values("z", ascending=False) + + print(f"\nDUAL ANALYSIS: {area.upper()}") + print( + f"Status: {info['status']}," + f" {info['iterations']} iters," + f" {info['solve_time']:.1f}s" + ) + print("\nShadow prices: higher = more expensive to satisfy.") + print("Targets at ±0.500% error are binding (at tolerance boundary).") + + print(f"\n{'Label':<55} {'|Dual|':>10} {'Err%':>8}") + print("-" * 75) + for _, r in rdf.iterrows(): + binding = " *" if abs(r["err"]) >= 0.499 else "" + print( + f"{r['label'][:55]:<55}" + f" {r['z']:>10.2f}" + f" {r['err']:>+7.3f}%{binding}" + ) + + n_binding = (rdf["err"].abs() >= 0.499).sum() + print(f"\n{n_binding}/{m} constraints binding (at tolerance boundary)") + + +def main(): + parser = argparse.ArgumentParser( + description="Developer mode — auto-relaxation for area weights", + ) + parser.add_argument( + "--scope", + default="cds", + help="'cds', 'states', or comma-separated area codes", + ) + parser.add_argument( + "--workers", + type=int, + default=1, + help="Number of parallel workers (default: 1)", + ) + parser.add_argument( + "--lp-only", + action="store_true", + help="Only run LP feasibility check (no QP solve)", + ) + parser.add_argument( + "--verbose", + action="store_true", + help="Print per-area relaxation progress", + ) + parser.add_argument( + "--difficulty", + metavar="AREA", + help="Print target difficulty table for a single area", + ) + parser.add_argument( + "--dual", + metavar="AREA", + help="Solve a single area and print dual (shadow price) analysis", + ) + args = parser.parse_args() + + scope_lower = args.scope.lower() + if scope_lower in ("cds", "cd"): + tdir = CD_TARGET_DIR + elif scope_lower in ("states", "state"): + tdir = STATE_TARGET_DIR + else: + tdir = CD_TARGET_DIR + + if args.difficulty: + target_difficulty(args.difficulty, target_dir=tdir) + return + + if args.dual: + dual_analysis(args.dual, target_dir=tdir) + return + + run_developer_tools( + scope=args.scope, + num_workers=args.workers, + lp_only=args.lp_only, + verbose=args.verbose, + ) + + +if __name__ == "__main__": + main() diff --git a/tmd/areas/fingerprints/cds_fingerprint.json b/tmd/areas/fingerprints/cds_fingerprint.json new file mode 100644 index 00000000..2b484cfe --- /dev/null +++ b/tmd/areas/fingerprints/cds_fingerprint.json @@ -0,0 +1,442 @@ +{ + "n_areas": 436, + "per_area_int_sums": { + "AK01": 419164, + "AL01": 386774, + "AL02": 383340, + "AL03": 383482, + "AL04": 377365, + "AL05": 391226, + "AL06": 381933, + "AL07": 394760, + "AR01": 393202, + "AR02": 405519, + "AR03": 401114, + "AR04": 396479, + "AZ01": 464901, + "AZ02": 431115, + "AZ03": 432999, + "AZ04": 462452, + "AZ05": 426770, + "AZ06": 447953, + "AZ07": 430845, + "AZ08": 447510, + "AZ09": 437484, + "CA01": 427707, + "CA02": 440774, + "CA03": 420091, + "CA04": 430205, + "CA05": 407943, + "CA06": 420248, + "CA07": 422926, + "CA08": 425046, + "CA09": 403078, + "CA10": 418032, + "CA11": 485393, + "CA12": 452137, + "CA13": 403221, + "CA14": 405088, + "CA15": 441475, + "CA16": 423612, + "CA17": 424747, + "CA18": 424999, + "CA19": 422963, + "CA20": 397271, + "CA21": 395360, + "CA22": 404375, + "CA23": 400355, + "CA24": 430985, + "CA25": 430121, + "CA26": 422606, + "CA27": 410379, + "CA28": 444241, + "CA29": 443701, + "CA30": 473557, + "CA31": 437043, + "CA32": 447221, + "CA33": 415826, + "CA34": 461327, + "CA35": 422659, + "CA36": 454410, + "CA37": 455015, + "CA38": 429579, + "CA39": 409919, + "CA40": 427269, + "CA41": 401601, + "CA42": 430151, + "CA43": 434791, + "CA44": 428292, + "CA45": 435353, + "CA46": 434632, + "CA47": 440107, + "CA48": 410658, + "CA49": 424112, + "CA50": 434378, + "CA51": 443219, + "CA52": 421738, + "CO01": 458227, + "CO02": 421103, + "CO03": 415199, + "CO04": 394633, + "CO05": 404087, + "CO06": 415673, + "CO07": 429951, + "CO08": 414842, + "CT01": 426009, + "CT02": 426274, + "CT03": 430463, + "CT04": 403141, + "CT05": 420846, + "DC01": 449322, + "DE01": 570070, + "FL01": 429398, + "FL02": 432362, + "FL03": 429843, + "FL04": 433491, + "FL05": 426026, + "FL06": 447890, + "FL07": 450559, + "FL08": 445978, + "FL09": 419742, + "FL10": 440971, + "FL11": 437427, + "FL12": 439020, + "FL13": 464773, + "FL14": 456613, + "FL15": 440411, + "FL16": 431357, + "FL17": 438791, + "FL18": 428400, + "FL19": 446299, + "FL20": 447366, + "FL21": 435315, + "FL22": 454517, + "FL23": 460552, + "FL24": 461223, + "FL25": 444725, + "FL26": 453310, + "FL27": 465487, + "FL28": 444027, + "GA01": 414914, + "GA02": 411762, + "GA03": 415364, + "GA04": 430731, + "GA05": 459125, + "GA06": 414092, + "GA07": 418660, + "GA08": 406371, + "GA09": 408708, + "GA10": 417586, + "GA11": 425766, + "GA12": 408101, + "GA13": 434365, + "GA14": 405519, + "HI01": 422042, + "HI02": 410627, + "IA01": 434279, + "IA02": 439953, + "IA03": 435423, + "IA04": 435578, + "ID01": 471268, + "ID02": 485934, + "IL01": 434455, + "IL02": 431384, + "IL03": 435282, + "IL04": 435372, + "IL05": 447408, + "IL06": 423835, + "IL07": 457429, + "IL08": 423637, + "IL09": 435577, + "IL10": 413186, + "IL11": 404459, + "IL12": 414779, + "IL13": 423321, + "IL14": 409272, + "IL15": 416546, + "IL16": 421134, + "IL17": 423640, + "IN01": 426217, + "IN02": 413013, + "IN03": 411201, + "IN04": 414982, + "IN05": 412738, + "IN06": 417350, + "IN07": 425420, + "IN08": 417407, + "IN09": 419040, + "KS01": 394680, + "KS02": 401426, + "KS03": 404534, + "KS04": 399699, + "KY01": 400842, + "KY02": 398821, + "KY03": 438015, + "KY04": 409597, + "KY05": 395304, + "KY06": 411366, + "LA01": 424394, + "LA02": 439625, + "LA03": 411808, + "LA04": 412064, + "LA05": 413182, + "LA06": 416880, + "MA01": 470869, + "MA02": 463550, + "MA03": 449200, + "MA04": 443621, + "MA05": 462876, + "MA06": 454264, + "MA07": 504955, + "MA08": 476515, + "MA09": 468614, + "MD01": 436406, + "MD02": 446840, + "MD03": 449728, + "MD04": 458120, + "MD05": 452775, + "MD06": 436732, + "MD07": 443919, + "MD08": 438197, + "ME01": 415308, + "ME02": 398713, + "MI01": 450493, + "MI02": 433324, + "MI03": 432942, + "MI04": 432069, + "MI05": 435214, + "MI06": 443257, + "MI07": 437987, + "MI08": 447470, + "MI09": 440049, + "MI10": 461251, + "MI11": 449926, + "MI12": 448048, + "MI13": 449795, + "MN01": 398052, + "MN02": 386445, + "MN03": 390772, + "MN04": 412284, + "MN05": 440711, + "MN06": 375953, + "MN07": 393773, + "MN08": 404209, + "MO01": 462147, + "MO02": 422738, + "MO03": 419587, + "MO04": 417402, + "MO05": 442901, + "MO06": 417771, + "MO07": 422233, + "MO08": 411215, + "MS01": 391595, + "MS02": 396809, + "MS03": 391610, + "MS04": 388894, + "MT01": 311046, + "MT02": 311047, + "NC01": 410584, + "NC02": 412984, + "NC03": 408002, + "NC04": 416399, + "NC05": 408343, + "NC06": 415789, + "NC07": 409797, + "NC08": 400188, + "NC09": 398431, + "NC10": 406214, + "NC11": 424824, + "NC12": 422371, + "NC13": 412267, + "NC14": 415351, + "ND01": 434512, + "NE01": 357842, + "NE02": 358349, + "NE03": 353855, + "NH01": 407919, + "NH02": 401257, + "NJ01": 440540, + "NJ02": 454036, + "NJ03": 435885, + "NJ04": 424150, + "NJ05": 424070, + "NJ06": 438680, + "NJ07": 430631, + "NJ08": 471993, + "NJ09": 443747, + "NJ10": 445813, + "NJ11": 425567, + "NJ12": 432508, + "NM01": 418058, + "NM02": 396218, + "NM03": 401593, + "NV01": 452936, + "NV02": 446496, + "NV03": 448640, + "NV04": 437791, + "NY01": 443415, + "NY02": 443025, + "NY03": 432780, + "NY04": 440481, + "NY05": 463044, + "NY06": 465715, + "NY07": 485033, + "NY08": 471676, + "NY09": 469424, + "NY10": 483442, + "NY11": 453847, + "NY12": 510438, + "NY13": 484994, + "NY14": 468420, + "NY15": 451571, + "NY16": 444898, + "NY17": 426134, + "NY18": 442672, + "NY19": 450677, + "NY20": 454970, + "NY21": 446438, + "NY22": 448189, + "NY23": 445111, + "NY24": 445198, + "NY25": 455800, + "NY26": 459473, + "OH01": 441349, + "OH02": 443013, + "OH03": 451095, + "OH04": 444119, + "OH05": 447523, + "OH06": 446565, + "OH07": 458665, + "OH08": 436926, + "OH09": 452634, + "OH10": 449918, + "OH11": 469941, + "OH12": 439617, + "OH13": 456704, + "OH14": 463185, + "OH15": 446246, + "OK01": 423783, + "OK02": 412997, + "OK03": 416131, + "OK04": 421307, + "OK05": 428530, + "OR01": 410599, + "OR02": 398788, + "OR03": 427794, + "OR04": 409485, + "OR05": 405390, + "OR06": 405629, + "PA01": 428430, + "PA02": 437825, + "PA03": 476496, + "PA04": 426860, + "PA05": 437146, + "PA06": 424253, + "PA07": 443611, + "PA08": 447210, + "PA09": 435776, + "PA10": 440439, + "PA11": 426711, + "PA12": 463817, + "PA13": 435460, + "PA14": 442911, + "PA15": 436665, + "PA16": 437924, + "PA17": 447833, + "RI01": 333680, + "RI02": 336132, + "SC01": 412282, + "SC02": 403497, + "SC03": 396273, + "SC04": 402237, + "SC05": 393987, + "SC06": 418917, + "SC07": 423707, + "SD01": 489968, + "TN01": 429528, + "TN02": 427755, + "TN03": 422864, + "TN04": 416543, + "TN05": 432945, + "TN06": 426905, + "TN07": 423475, + "TN08": 409878, + "TN09": 434275, + "TX01": 405615, + "TX02": 406859, + "TX03": 385059, + "TX04": 384980, + "TX05": 413555, + "TX06": 411699, + "TX07": 418126, + "TX08": 403595, + "TX09": 417794, + "TX10": 410931, + "TX11": 401542, + "TX12": 405188, + "TX13": 407924, + "TX14": 409232, + "TX15": 393783, + "TX16": 414225, + "TX17": 410879, + "TX18": 427549, + "TX19": 410533, + "TX20": 423297, + "TX21": 437067, + "TX22": 386506, + "TX23": 393733, + "TX24": 421059, + "TX25": 404469, + "TX26": 390645, + "TX27": 413454, + "TX28": 404294, + "TX29": 407831, + "TX30": 424230, + "TX31": 393198, + "TX32": 423367, + "TX33": 417349, + "TX34": 395962, + "TX35": 431052, + "TX36": 400136, + "TX37": 419371, + "TX38": 415980, + "UT01": 403262, + "UT02": 437168, + "UT03": 416352, + "UT04": 419015, + "VA01": 422240, + "VA02": 448240, + "VA03": 445846, + "VA04": 451591, + "VA05": 439114, + "VA06": 434556, + "VA07": 426702, + "VA08": 469660, + "VA09": 436546, + "VA10": 403115, + "VA11": 431021, + "VT01": 389489, + "WA01": 423776, + "WA02": 443123, + "WA03": 424145, + "WA04": 407911, + "WA05": 431803, + "WA06": 445212, + "WA07": 484589, + "WA08": 406671, + "WA09": 445642, + "WA10": 428670, + "WI01": 422047, + "WI02": 434308, + "WI03": 418591, + "WI04": 429695, + "WI05": 418520, + "WI06": 424998, + "WI07": 417027, + "WI08": 414160, + "WV01": 492141, + "WV02": 502957, + "WY01": 315984 + }, + "weight_hash": "6b27202d43156eac" +} \ No newline at end of file diff --git a/tmd/areas/prepare/recipes/cd_solver_overrides.yaml b/tmd/areas/prepare/recipes/cd_solver_overrides.yaml new file mode 100644 index 00000000..9a2e14e4 --- /dev/null +++ b/tmd/areas/prepare/recipes/cd_solver_overrides.yaml @@ -0,0 +1,608 @@ +# Solver overrides — auto-generated by developer mode +# Edit manually only if automatic relaxation is insufficient + +_defaults: + multiplier_max: 50.0 + constraint_tol: 0.005 +al01: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +al02: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +al03: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +al04: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +al07: + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - ctc_total/cnt=0/scope=1/agi=[10000.0,25000.0)/fs=0 +ar01: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[75000.0,100000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[100000.0,200000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 +ar04: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +az03: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e00300/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[500000.0,9e+99)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 +az07: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[75000.0,100000.0)/fs=0 +ca01: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca09: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca11: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - eitc/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +ca13: + drop_targets: + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca15: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - eitc/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +ca16: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +ca20: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +ca21: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca22: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca23: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca25: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca30: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +ca33: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +ca35: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca39: + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[500000.0,9e+99)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +ca41: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca42: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca43: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ca44: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +ca52: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +fl09: + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +fl18: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +fl20: + drop_targets: + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +fl22: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +fl24: + drop_targets: + - e01500/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +fl26: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +fl27: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +fl28: + drop_targets: + - e00300/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +ga02: + multiplier_max: 100.0 + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[500000.0,9e+99)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 +ga08: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +ga12: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 +ga13: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +ga14: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +hi02: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +il12: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +il17: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ks01: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ky01: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ky02: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ky05: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e00300/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[500000.0,9e+99)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 +la02: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=0/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[500000.0,9e+99)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 +la03: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +la04: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +la05: + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +md04: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +md05: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +me02: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +mn07: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +mn08: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ms01: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[75000.0,100000.0)/fs=0 +ms02: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=0/scope=1/agi=[10000.0,25000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[500000.0,9e+99)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 +ms03: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +ms04: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +nc01: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +ne03: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ny05: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ny06: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +ny12: + multiplier_max: 100.0 + drop_targets: + - ctc_total/cnt=0/scope=1/agi=[10000.0,25000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[100000.0,200000.0)/fs=0 + - ctc_total/cnt=0/scope=1/agi=[50000.0,75000.0)/fs=0 + - ctc_total/cnt=0/scope=1/agi=[25000.0,50000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - e01500/cnt=0/scope=1/agi=[25000.0,50000.0)/fs=0 + - e01500/cnt=0/scope=1/agi=[50000.0,75000.0)/fs=0 +ny13: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +ny15: + drop_targets: + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +ok02: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +or02: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +or03: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +or04: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +pa02: + multiplier_max: 100.0 + drop_targets: + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 + - e00300/cnt=0/scope=1/agi=[500000.0,9e+99)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e01500/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 +sc03: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +sc06: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +tn09: + drop_targets: + - ctc_total/cnt=0/scope=1/agi=[10000.0,25000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e18400/cnt=0/scope=1/agi=[-9e+99,9e+99)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +tx01: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +tx05: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +tx06: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +tx07: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +tx09: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +tx11: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 +tx15: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e18400/cnt=0/scope=1/agi=[-9e+99,9e+99)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - c18300/cnt=0/scope=1/agi=[200000.0,500000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[75000.0,100000.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=0/scope=1/agi=[10000.0,25000.0)/fs=0 + - e00200/cnt=2/scope=1/agi=[200000.0,500000.0)/fs=0 +tx16: + drop_targets: + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e18400/cnt=0/scope=1/agi=[-9e+99,9e+99)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +tx18: + drop_targets: + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +tx20: + drop_targets: + - e18400/cnt=0/scope=1/agi=[-9e+99,9e+99)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - capgains_net/cnt=0/scope=1/agi=[100000.0,200000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +tx22: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +tx23: + drop_targets: + - e18400/cnt=0/scope=1/agi=[-9e+99,9e+99)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 +tx27: + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 +tx28: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e18400/cnt=0/scope=1/agi=[-9e+99,9e+99)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - capgains_net/cnt=0/scope=1/agi=[100000.0,200000.0)/fs=0 +tx29: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e18400/cnt=0/scope=1/agi=[-9e+99,9e+99)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - capgains_net/cnt=0/scope=1/agi=[100000.0,200000.0)/fs=0 + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[200000.0,500000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 +tx30: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[200000.0,500000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[75000.0,100000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 +tx33: + drop_targets: + - e00300/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[1.0,10000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[-9e+99,1.0)/fs=0 + - ctc_total/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +tx34: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - e26270/cnt=0/scope=1/agi=[10000.0,25000.0)/fs=0 + - e26270/cnt=0/scope=1/agi=[25000.0,50000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - eitc/cnt=0/scope=1/agi=[25000.0,50000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[50000.0,75000.0)/fs=0 + - e00200/cnt=0/scope=1/agi=[25000.0,50000.0)/fs=0 + - e00200/cnt=2/scope=1/agi=[50000.0,75000.0)/fs=0 + - c00100/cnt=0/scope=1/agi=[25000.0,50000.0)/fs=0 + - e00200/cnt=2/scope=1/agi=[25000.0,50000.0)/fs=0 + - c00100/cnt=1/scope=1/agi=[50000.0,75000.0)/fs=0 +tx36: + multiplier_max: 100.0 + drop_targets: + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +wa07: + multiplier_max: 100.0 + constraint_tol: 0.01 + drop_targets: + - ctc_total/cnt=0/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[10000.0,25000.0)/fs=0 + - eitc/cnt=2/scope=1/agi=[1.0,10000.0)/fs=0 +wa08: + drop_targets: + - e18400/cnt=0/scope=1/agi=[-9e+99,9e+99)/fs=0 +wv01: + drop_targets: + - c00100/cnt=1/scope=1/agi=[500000.0,9e+99)/fs=0 diff --git a/tmd/areas/prepare/target_file_writer.py b/tmd/areas/prepare/target_file_writer.py index 37dc9b93..45177ceb 100644 --- a/tmd/areas/prepare/target_file_writer.py +++ b/tmd/areas/prepare/target_file_writer.py @@ -20,6 +20,7 @@ from tmd.areas.prepare.constants import ( ALLCOUNT_VARS, + CD_NUM_AGI_STUBS, STATE_NUM_AGI_STUBS, ) @@ -132,21 +133,39 @@ def _build_match_frame( target_stubs["_merge"] == "left_only" ].drop(columns=["_merge"]) - # Add sort numbers and XTOT row + # Add sort numbers, XTOT row, and total count rows target_stubs["sort"] = range(2, len(target_stubs) + 2) - xtot = pd.DataFrame( - [ - { - "varname": "XTOT", - "scope": 0, - "count": 0, - "fstatus": 0, - "agistub": 0, - "sort": 1, - } - ] - ) - target_stubs = pd.concat([xtot, target_stubs], ignore_index=True) + # XTOT is always first + extra_rows = [ + { + "varname": "XTOT", + "scope": 0, + "count": 0, + "fstatus": 0, + "agistub": 0, + "sort": 1, + } + ] + # Add agistub=0 (total) rows for count targets that either + # have filing-status breakdowns or have all bins excluded + # (indicating they want only the total). + if recipe.get("include_totals", False): + next_sort = len(target_stubs) + 2 + for _, row in target_rules.iterrows(): + if row["count"] == 1: + extra_rows.append( + { + "varname": row["varname"], + "scope": row["scope"], + "count": int(row["count"]), + "fstatus": int(row["fstatus"]), + "agistub": 0, + "sort": next_sort, + } + ) + next_sort += 1 + extras = pd.DataFrame(extra_rows) + target_stubs = pd.concat([extras, target_stubs], ignore_index=True) # Cross variable mapping with count values 0-4 counts_df = pd.DataFrame({"count": range(5)}) @@ -229,6 +248,8 @@ def write_area_target_files( areatype = recipe["areatype"] if areatype == "state": top_agistub = STATE_NUM_AGI_STUBS + elif areatype == "cd": + top_agistub = CD_NUM_AGI_STUBS else: raise ValueError(f"Unknown areatype: {areatype}") diff --git a/tmd/areas/prepare_targets.py b/tmd/areas/prepare_targets.py index 249ce3f7..111114b9 100644 --- a/tmd/areas/prepare_targets.py +++ b/tmd/areas/prepare_targets.py @@ -1,4 +1,4 @@ -# pylint: disable=import-outside-toplevel +# pylint: disable=import-outside-toplevel,inconsistent-quotes """ Prepare area target files from SOI data and TMD national totals. @@ -249,10 +249,9 @@ def prepare_targets_from_spec( # 2. Read shares shares = pd.read_csv(shares_path) - n_areas = shares["area"].nunique() print( f"Shares: {len(shares):,} rows," - f" {n_areas} areas" + f" {shares['area'].nunique()} areas" f" from {shares_path.name}" ) @@ -260,8 +259,7 @@ def prepare_targets_from_spec( if scope_lower not in ("cds", "states", "all"): codes = [c.strip().upper() for c in scope.split(",")] shares = shares[shares["area"].isin(codes)] - n_filtered = shares["area"].nunique() - print(f" Filtered to {n_filtered} areas") + print(f" Filtered to {shares['area'].nunique()} areas") # 3. Compute TMD national sums (base + extended) all_mappings = ALL_SHARING_MAPPINGS + EXTENDED_SHARING_MAPPINGS diff --git a/tmd/areas/quality_report.py b/tmd/areas/quality_report.py index ef4d6b69..a1269e53 100644 --- a/tmd/areas/quality_report.py +++ b/tmd/areas/quality_report.py @@ -12,7 +12,7 @@ - Bystander checks (untargeted variables + per-bin analysis) Usage: - python -m tmd.areas.quality_report + python -m tmd.areas.quality_report --scope states python -m tmd.areas.quality_report --scope cds python -m tmd.areas.quality_report --scope cds --output python -m tmd.areas.quality_report --scope CA,WY -o report.txt @@ -410,8 +410,7 @@ def generate_report( avg_time = solved["solve_time"].mean() lines.append( f"Cumulative solve time: {cum_time:.0f}s" - f" (avg {avg_time:.1f}s per area;" - f" ~{cum_time / 16:.0f}s wall @ 16 workers)" + f" (avg {avg_time:.1f}s per area)" ) lines.append("") @@ -539,7 +538,7 @@ def generate_report( display_df = df else: lines.append( - "PER-AREA DETAIL (top 20 by violations / weight distortion):" + "PER-AREA DETAIL" + " (top 20 by violations / weight distortion):" ) # Always include failed areas, then sort solved by # violations desc, then weight RMSE desc @@ -899,8 +898,8 @@ def _weight_distribution_by_stub( return lines -def _weight_diagnostics( - _areas, _weight_dir, target_dir, tmd, s006, state_weights, n_loaded +def _weight_diagnostics( # pylint: disable=unused-argument + areas, weight_dir, target_dir, tmd, s006, state_weights, n_loaded ): """ Combined weight diagnostics: exhaustion + national aggregation. @@ -1017,7 +1016,8 @@ def _weight_diagnostics( f" for SELECTED VARIABLES ({n_loaded} areas):" ) lines.append( - " Do area weights preserve national totals? Diff% near 0 = good." + " Do area weights preserve national totals?" + + " Diff% near 0 = good." ) lines.append( f" {'Variable':<30} {'National':>16}" diff --git a/tmd/areas/solve_weights.py b/tmd/areas/solve_weights.py index 2f085368..0207a765 100644 --- a/tmd/areas/solve_weights.py +++ b/tmd/areas/solve_weights.py @@ -1,25 +1,25 @@ # pylint: disable=import-outside-toplevel """ -Solve for state weights using Clarabel QP optimizer. +Solve for area weights using Clarabel QP optimizer. -Reads per-state target CSV files (produced by prepare_targets.py) +Reads per-area target CSV files (produced by prepare_targets.py) and runs the Clarabel constrained QP solver to find weight multipliers that hit area-specific targets within tolerance. Optional exhaustion limiting (--max-exhaustion) runs a two-pass solve: first unconstrained, then with per-record multiplier caps -to keep cross-state weight exhaustion within bounds. +to keep cross-area weight exhaustion within bounds. Usage: # All states, 8 parallel workers: python -m tmd.areas.solve_weights --scope states --workers 8 - # With exhaustion cap of 5x: - python -m tmd.areas.solve_weights --scope states --workers 8 \ - --max-exhaustion 5 + # All congressional districts: + python -m tmd.areas.solve_weights --scope cds --workers 16 - # Specific states: + # Specific areas: python -m tmd.areas.solve_weights --scope MN,CA,TX --workers 4 + python -m tmd.areas.solve_weights --scope MN01,CA52 --workers 4 """ import argparse @@ -30,6 +30,9 @@ from tmd.areas.create_area_weights import ( AREA_MULTIPLIER_MAX, + CD_MULTIPLIER_MAX, + CD_TARGET_DIR, + CD_WEIGHT_DIR, STATE_TARGET_DIR, STATE_WEIGHT_DIR, ) @@ -39,6 +42,12 @@ _MAX_EXHAUST_ITERATIONS = 5 +def _fmt_time(seconds): + """Format seconds as '1034.4s (17m 14s)'.""" + m, s = divmod(seconds, 60) + return f"{seconds:.1f}s ({int(m)}m {s:.0f}s)" + + def solve_state_weights( scope="states", num_workers=1, @@ -82,7 +91,7 @@ def solve_state_weights( if max_exhaustion is None: elapsed = time.time() - t0 - print(f"Total solve time: {elapsed:.1f}s") + print(f"Total solve time: {_fmt_time(elapsed)}") return # --- Exhaustion-limited iterative passes --- @@ -138,7 +147,47 @@ def solve_state_weights( ) elapsed = time.time() - t0 - print(f"Total solve time: {elapsed:.1f}s") + print(f"Total solve time: {_fmt_time(elapsed)}") + + +def solve_cd_weights( + scope="cds", + num_workers=1, + force=True, +): + """ + Run the Clarabel solver for congressional districts. + + Parameters + ---------- + scope : str + 'cds' or comma-separated CD codes (e.g., 'MN01,CA52'). + num_workers : int + Number of parallel worker processes. + force : bool + Recompute all areas even if weight files are up-to-date. + """ + from tmd.areas.batch_weights import run_batch + + specific = _parse_cd_scope(scope) + if specific: + area_filter = ",".join(a.lower() for a in specific) + else: + area_filter = "cds" + + t0 = time.time() + print("Solving CD weights...") + run_batch( + num_workers=num_workers, + area_filter=area_filter, + force=force, + target_dir=CD_TARGET_DIR, + weight_dir=CD_WEIGHT_DIR, + multiplier_max=CD_MULTIPLIER_MAX, + ) + + elapsed = time.time() - t0 + print(f"Total solve time: {_fmt_time(elapsed)}") def _compute_exhaustion(weight_dir): @@ -250,15 +299,39 @@ def _parse_scope(scope): return [c for c in codes if len(c) == 2 and c not in _EXCLUDE] +def _parse_cd_scope(scope): + """Parse scope string into list of CD codes or None.""" + scope_lower = scope.lower().strip() + if scope_lower in ("cds", "all"): + return None + codes = [c.strip().upper() for c in scope.split(",") if c.strip()] + return [c for c in codes if len(c) > 2] + + +def _is_cd_scope(scope): + """Return True if the scope refers to CDs rather than states.""" + scope_lower = scope.lower().strip() + if scope_lower == "cds": + return True + if scope_lower == "states": + return False + # Comma-separated: check first code length + first = scope.split(",")[0].strip() + return len(first) > 2 + + def main(): """CLI entry point.""" parser = argparse.ArgumentParser( - description=("Solve for state weights using Clarabel QP optimizer"), + description=("Solve for area weights using Clarabel QP optimizer"), ) parser.add_argument( "--scope", default="states", - help=("'states' or comma-separated state codes" " (e.g., 'MN,CA,TX')"), + help=( + "'states', 'cds', or comma-separated area codes" + " (e.g., 'MN,CA,TX' or 'MN01,CA52')" + ), ) parser.add_argument( "--workers", @@ -277,18 +350,25 @@ def main(): type=float, default=None, help=( - "Max per-record cross-state weight exhaustion" + "Max per-record cross-area weight exhaustion" " (e.g., 5.0). Runs iterative solve to enforce." ), ) args = parser.parse_args() - solve_state_weights( - scope=args.scope, - num_workers=args.workers, - force=args.force, - max_exhaustion=args.max_exhaustion, - ) + if _is_cd_scope(args.scope): + solve_cd_weights( + scope=args.scope, + num_workers=args.workers, + force=args.force, + ) + else: + solve_state_weights( + scope=args.scope, + num_workers=args.workers, + force=args.force, + max_exhaustion=args.max_exhaustion, + ) if __name__ == "__main__":