From 677cca92f0d22a5efb96552a3bca34b769cc9e5f Mon Sep 17 00:00:00 2001
From: Don Boyd <donboyd5@gmail.com>
Date: Tue, 24 Mar 2026 13:04:11 -0400
Subject: [PATCH] Enhance quality report: auto-save, multiplier histograms,
 bystander analysis

Major expansion of quality_report.py:
- --output flag to auto-save report to file
- Scope/timestamp header with solve time summary
- Aggregate multiplier distribution across all areas
- Weight distribution by AGI stub (national vs sum-of-areas)
- Per-bin bystander analysis (targeted vs untargeted distortion)
- Top-N per-area detail for large area sets (CDs, counties)
- Auto-discover areas from log files
- Support --scope cds

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tmd/areas/quality_report.py | 825 ++++++++++++++++++++++++++++++++----
 1 file changed, 748 insertions(+), 77 deletions(-)

diff --git a/tmd/areas/quality_report.py b/tmd/areas/quality_report.py
index b18837df..ef4d6b69 100644
--- a/tmd/areas/quality_report.py
+++ b/tmd/areas/quality_report.py
@@ -1,21 +1,27 @@
-# pylint: disable=import-outside-toplevel,inconsistent-quotes
+# pylint: disable=import-outside-toplevel,inconsistent-quotes,too-many-lines
 """
-Cross-state quality summary report.
+Cross-area quality summary report.
 
-Parses solver logs for all states and produces a summary showing:
+Parses solver logs for all areas and produces a summary showing:
   - Solve status and timing
   - Target accuracy (hit rate, mean/max error)
   - Weight distortion (RMSE, percentiles)
+  - Aggregate multiplier distribution (old vs new weight comparison)
   - Violated targets by variable
-  - Weight exhaustion and cross-state aggregation diagnostics
+  - Weight exhaustion and cross-area aggregation diagnostics
+  - Bystander checks (untargeted variables + per-bin analysis)
 
 Usage:
     python -m tmd.areas.quality_report
-    python -m tmd.areas.quality_report --scope CA,WY
+    python -m tmd.areas.quality_report --scope cds
+    python -m tmd.areas.quality_report --scope cds --output
+    python -m tmd.areas.quality_report --scope CA,WY -o report.txt
+    python -m tmd.areas.quality_report --scope MN01,MN02
 """
 
 import argparse
 import re
+from datetime import datetime
 from pathlib import Path
 
 import numpy as np
@@ -23,9 +29,11 @@
 
 from tmd.areas.create_area_weights import (
     AREA_CONSTRAINT_TOL,
+    CD_TARGET_DIR,
+    CD_WEIGHT_DIR,
+    STATE_TARGET_DIR,
     STATE_WEIGHT_DIR,
 )
-from tmd.areas.prepare.constants import ALL_STATES
 from tmd.imputation_assumptions import TAXYEAR
 
 _WT_COL = f"WT{TAXYEAR}"
@@ -77,6 +85,15 @@ def _humanize_desc(desc: str) -> str:
     return " ".join(pieces)
 
 
+def _fmt_agi_bin(lo, hi):
+    """Format an AGI bin range for display."""
+    if lo < -1e10:
+        return "<$0"
+    if hi > 1e10:
+        return f"${lo / 1000:.0f}K+"
+    return f"${lo / 1000:.0f}K-${hi / 1000:.0f}K"
+
+
 def parse_log(logpath: Path) -> dict:
     """Parse a single area solver log file into a summary dict."""
     if not logpath.exists():
@@ -92,6 +109,11 @@ def parse_log(logpath: Path) -> dict:
     if "PrimalInfeasible" in log or "FAILED" in log:
         result["status"] = "FAILED"
 
+    # Population share
+    m = re.search(r"pop_share = ([\d.]+) / ([\d.]+) = ([\d.]+)", log)
+    if m:
+        result["pop_share"] = float(m.group(3))
+
     # Solve time
     m = re.search(r"Solve time: ([\d.]+)s", log)
     if m:
@@ -174,18 +196,174 @@ def parse_log(logpath: Path) -> dict:
     return result
 
 
-def generate_report(areas=None, weight_dir=None):
-    """Generate cross-state quality summary report."""
-    if areas is None:
-        areas = ALL_STATES
+def _list_areas_from_logs(weight_dir):
+    """Return sorted list of area codes from log files."""
+    logs = sorted(weight_dir.glob("*.log"))
+    return [lp.stem for lp in logs]
+
+
+def _infer_target_dir(weight_dir):
+    """Infer target directory from weight directory."""
+    # weight_dir is like .../weights/states or .../weights/cds
+    return weight_dir.parent.parent / "targets" / weight_dir.name
+
+
+def _aggregate_multiplier_histogram(solved_df):
+    """
+    Aggregate per-area multiplier distributions into a combined view.
+
+    The multiplier x = area_weight / (pop_share * national_weight).
+    x = 1.0 means the record keeps its population-proportional share.
+    This is the side-by-side comparison of initial (x=1) vs optimized
+    weights.
+    """
+    lines = []
+
+    # Canonical bin order matching the solver's histogram
+    # Note: exact zeros are logged as [0.0000, 0.0000) → key (0.0, 0.0)
+    canonical_bins = [
+        (0.0, 0.0),
+        (0.0, 0.1),
+        (0.1, 0.5),
+        (0.5, 0.8),
+        (0.8, 0.9),
+        (0.9, 0.95),
+        (0.95, 1.0),
+        (1.0, 1.05),
+        (1.05, 1.1),
+        (1.1, 1.2),
+        (1.2, 1.5),
+        (1.5, 2.0),
+        (2.0, 5.0),
+        (5.0, 10.0),
+        (10.0, 100.0),
+        (100.0, np.inf),
+    ]
+    # Aggregate counts from all areas
+    agg = {}
+    total_records = 0
+    n_areas = 0
+    for _, row in solved_df.iterrows():
+        dist = row.get("dist_bins", {})
+        n_rec = row.get("n_records", 0)
+        if not dist or n_rec == 0:
+            continue
+        n_areas += 1
+        total_records += n_rec
+        for key, val in dist.items():
+            agg[key] = agg.get(key, 0) + val["count"]
+
+    if total_records == 0:
+        return lines
+
+    n_per_area = total_records // n_areas if n_areas > 0 else 0
+    lines.append(
+        "AGGREGATE MULTIPLIER DISTRIBUTION"
+        f" ({n_areas} areas x {n_per_area:,}"
+        f" records = {total_records:,} area-record pairs):"
+    )
+    lines.append(
+        "  The multiplier x = area_weight / (pop_share * national_weight)."
+    )
+    lines.append(
+        "  x = 1.0 means the record keeps its population-proportional"
+        " share."
+    )
+    lines.append(
+        "  Initial (pre-optimization): all x = 1.0."
+        " This histogram shows the optimized distribution."
+    )
+
+    bin_labels = {
+        (0.0, 0.0): "x = 0 (excluded)",
+        (0.0, 0.1): "(0, 0.1)",
+        (0.1, 0.5): "[0.1, 0.5)",
+        (0.5, 0.8): "[0.5, 0.8)",
+        (0.8, 0.9): "[0.8, 0.9)",
+        (0.9, 0.95): "[0.9, 0.95)",
+        (0.95, 1.0): "[0.95, 1.0)",
+        (1.0, 1.05): "[1.0, 1.05)",
+        (1.05, 1.1): "[1.05, 1.1)",
+        (1.1, 1.2): "[1.1, 1.2)",
+        (1.2, 1.5): "[1.2, 1.5)",
+        (1.5, 2.0): "[1.5, 2.0)",
+        (2.0, 5.0): "[2.0, 5.0)",
+        (5.0, 10.0): "[5.0, 10.0)",
+        (10.0, 100.0): "[10.0, 100.0)",
+        (100.0, np.inf): "[100.0, inf)",
+    }
+
+    lines.append(f"  {'Bin':<20} {'Count':>12} {'Pct':>8} {'CumPct':>8}")
+    lines.append("  " + "-" * 50)
+    cum_pct = 0.0
+    for bkey in canonical_bins:
+        cnt = agg.get(bkey, 0)
+        if cnt == 0:
+            continue
+        pct = 100.0 * cnt / total_records
+        cum_pct += pct
+        label = bin_labels.get(bkey, str(bkey))
+        lines.append(f"  {label:<20} {cnt:>12,} {pct:>7.1f}% {cum_pct:>7.1f}%")
+
+    # Summary stats
+    n_near_one = sum(agg.get(b, 0) for b in [(0.95, 1.0), (1.0, 1.05)])
+    n_within_20 = sum(
+        agg.get(b, 0)
+        for b in [
+            (0.8, 0.9),
+            (0.9, 0.95),
+            (0.95, 1.0),
+            (1.0, 1.05),
+            (1.05, 1.1),
+            (1.1, 1.2),
+        ]
+    )
+    n_excluded = agg.get((0.0, 0.0), 0)
+    n_extreme = sum(
+        agg.get(b, 0) for b in [(5.0, 10.0), (10.0, 100.0), (100.0, np.inf)]
+    )
+    lines.append("")
+    lines.append(
+        f"  Within +/-5% of 1.0:"
+        f" {n_near_one:,} ({100 * n_near_one / total_records:.1f}%)"
+    )
+    lines.append(
+        f"  Within +/-20% of 1.0:"
+        f" {n_within_20:,} ({100 * n_within_20 / total_records:.1f}%)"
+    )
+    lines.append(
+        f"  Excluded (x=0):"
+        f" {n_excluded:,} ({100 * n_excluded / total_records:.1f}%)"
+    )
+    if n_extreme > 0:
+        lines.append(
+            f"  Extreme (x>=5.0):"
+            f" {n_extreme:,} ({100 * n_extreme / total_records:.1f}%)"
+        )
+    lines.append("")
+
+    return lines
+
+
+def generate_report(
+    areas=None,
+    weight_dir=None,
+    target_dir=None,
+    scope_label=None,
+):
+    """Generate cross-area quality summary report."""
     if weight_dir is None:
         weight_dir = STATE_WEIGHT_DIR
+    if target_dir is None:
+        target_dir = _infer_target_dir(weight_dir)
+    if areas is None:
+        areas = _list_areas_from_logs(weight_dir)
 
     rows = []
     for st in areas:
         logpath = weight_dir / f"{st.lower()}.log"
         info = parse_log(logpath)
-        info["state"] = st
+        info["area"] = st
         rows.append(info)
 
     df = pd.DataFrame(rows)
@@ -193,7 +371,6 @@ def generate_report(areas=None, weight_dir=None):
     # Summary statistics
     solved = df[df["status"].isin(["Solved", "AlmostSolved"])]
     failed = df[df["status"] == "FAILED"]
-    n_states = len(df)
     n_solved = len(solved)
     n_failed = len(failed)
     n_violated_states = (solved["n_violated"] > 0).sum()
@@ -203,40 +380,57 @@ def generate_report(areas=None, weight_dir=None):
 
     lines = []
     lines.append("=" * 80)
-    lines.append("CROSS-STATE QUALITY SUMMARY REPORT")
+    header = "CROSS-AREA QUALITY SUMMARY REPORT"
+    if scope_label:
+        header += f"  [{scope_label}]"
+    lines.append(header)
+    lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
     lines.append("=" * 80)
     lines.append("")
 
     # Overall
-    lines.append(f"States: {n_states}")
+    n_areas = len(df)
+    lines.append(f"Areas: {n_areas}")
     lines.append(f"Solved: {n_solved}")
     lines.append(f"Failed: {n_failed}")
     if n_failed > 0:
-        lines.append(f"  Failed: {', '.join(failed['state'].tolist())}")
+        lines.append(f"  Failed: {', '.join(failed['area'].tolist())}")
     lines.append(
-        f"States with violated targets: {n_violated_states}/{n_solved}"
+        f"Areas with violated targets: {n_violated_states}/{n_solved}"
     )
     if not solved.empty and "targets_total" in solved.columns:
         tpt = int(solved["targets_total"].iloc[0])
         tpt_sum = int(solved["targets_total"].sum())
     else:
         tpt, tpt_sum = "?", "?"
-    lines.append(f"Total targets: {n_solved} states \u00d7 {tpt} = {tpt_sum}")
+    lines.append(f"Total targets: {n_solved} areas \u00d7 {tpt} = {tpt_sum}")
     lines.append(f"Total violated targets: {int(total_violated)}")
+    if not solved.empty and "solve_time" in solved.columns:
+        cum_time = solved["solve_time"].sum()
+        avg_time = solved["solve_time"].mean()
+        lines.append(
+            f"Cumulative solve time: {cum_time:.0f}s"
+            f" (avg {avg_time:.1f}s per area;"
+            f" ~{cum_time / 16:.0f}s wall @ 16 workers)"
+        )
     lines.append("")
 
     # Target accuracy
     if not solved.empty and "mean_err" in solved.columns:
         lines.append("TARGET ACCURACY:")
         lines.append(
-            f"  Per-state mean error: "
-            f"avg across states={solved['mean_err'].mean():.4f}, "
-            f"worst state={solved['mean_err'].max():.4f}"
+            "  Error = |achieved - target| / |target|."
+            "  A target is 'hit' if error < tolerance."
         )
         lines.append(
-            f"  Per-state max error:  "
-            f"avg across states={solved['max_err'].mean():.4f}, "
-            f"worst state={solved['max_err'].max():.4f}"
+            f"  Per-area mean error:"
+            f"avg={solved['mean_err'].mean():.4f}, "
+            f"worst={solved['mean_err'].max():.4f}"
+        )
+        lines.append(
+            f"  Per-area max error: "
+            f"avg={solved['max_err'].mean():.4f}, "
+            f"worst={solved['max_err'].max():.4f}"
         )
         if "targets_hit" in solved.columns:
             total_t = solved["targets_total"].iloc[0]
@@ -253,6 +447,14 @@ def generate_report(areas=None, weight_dir=None):
     # Weight distortion
     if not solved.empty and "w_rmse" in solved.columns:
         lines.append("WEIGHT DISTORTION (multiplier from 1.0):")
+        lines.append(
+            "  Each record's multiplier x = area_weight"
+            " / (pop_share * national_weight)."
+        )
+        lines.append(
+            "  x=1.0 means population-proportional."
+            "  RMSE measures overall departure from x=1."
+        )
         lines.append(
             f"  RMSE:   avg={solved['w_rmse'].mean():.3f}, "
             f"max={solved['w_rmse'].max():.3f}"
@@ -280,6 +482,10 @@ def generate_report(areas=None, weight_dir=None):
         )
         lines.append("")
 
+    # Aggregate multiplier distribution (old vs new weight comparison)
+    if not solved.empty:
+        lines.extend(_aggregate_multiplier_histogram(solved))
+
     # Near-zero weight summary
     if not solved.empty:
         zero_pcts = []
@@ -295,6 +501,10 @@ def generate_report(areas=None, weight_dir=None):
             lt01_pcts.append(100 * n_lt01 / n_rec)
         if zero_pcts:
             lines.append("NEAR-ZERO WEIGHT MULTIPLIERS (% of records):")
+            lines.append(
+                "  Records with x near 0 are effectively"
+                " excluded from the area."
+            )
             lines.append(
                 f"  Exact zero (x=0):  "
                 f"avg={np.mean(zero_pcts):.1f}%, "
@@ -307,21 +517,68 @@ def generate_report(areas=None, weight_dir=None):
             )
             lines.append("")
 
-    # Per-state table
-    lines.append("PER-STATE DETAIL:")
+    # Load TMD data and weight files (once, shared by all diagnostics)
+    report_data = _load_report_data(areas, weight_dir)
+    if report_data is not None:
+        tmd, s006, state_weights, n_loaded = report_data
+        # Weight distribution by AGI stub (old vs new)
+        lines.extend(
+            _weight_distribution_by_stub(
+                tmd, s006, state_weights, n_loaded, target_dir
+            )
+        )
+
+    # Per-area table
+    # For small area counts (states), show all areas.
+    # For large area counts (CDs, counties), show top 20 by max error.
+    _DETAIL_CUTOFF = 60
+    show_all = n_areas <= _DETAIL_CUTOFF
+
+    if show_all:
+        lines.append("PER-AREA DETAIL:")
+        display_df = df
+    else:
+        lines.append(
+            "PER-AREA DETAIL (top 20 by violations / weight distortion):"
+        )
+        # Always include failed areas, then sort solved by
+        # violations desc, then weight RMSE desc
+        failed_rows = df[df["status"] == "FAILED"]
+        solved_rows = df[df["status"].isin(["Solved", "AlmostSolved"])].copy()
+        solved_rows["_viol"] = solved_rows["n_violated"].fillna(0)
+        solved_rows["_rmse"] = solved_rows["w_rmse"].fillna(0)
+        top_solved = solved_rows.sort_values(
+            ["_viol", "_rmse"], ascending=[False, False]
+        ).head(20)
+        top_solved = top_solved.drop(columns=["_viol", "_rmse"])
+        display_df = pd.concat([failed_rows, top_solved]).drop_duplicates(
+            subset=["area"]
+        )
+
     lines.append(
-        "  Err cols = |relative error| (fraction); "
-        "weight cols = multiplier on national weight (1.0 = unchanged)"
+        "  MeanErr/MaxErr = |relative error| (fraction)."
+        "  wRMSE = root-mean-square deviation of"
+        " multipliers from 1.0"
     )
+    lines.append(
+        "  (higher = more distortion)."
+        "  wP05/wMed/wP95/wMax = multiplier percentiles."
+        "  %zero = records excluded."
+    )
+    max_id = max(
+        (len(str(row["area"])) for _, row in display_df.iterrows()),
+        default=4,
+    )
+    id_w = max(max_id + 1, 5)
     header = (
-        f"{'St':<4} {'Status':<14} {'Hit':>5} {'Tot':>5} "
+        f"{'Area':<{id_w}} {'Status':<14} {'Hit':>5} {'Tot':>5} "
         f"{'Viol':>5} {'MeanErr':>8} {'MaxErr':>8} "
         f"{'wRMSE':>7} {'wP05':>7} {'wMed':>7} "
         f"{'wP95':>7} {'wMax':>8} {'%zero':>6}"
     )
     lines.append(header)
     lines.append("-" * len(header))
-    for _, row in df.iterrows():
+    for _, row in display_df.iterrows():
         hit = int(row.get("targets_hit", 0))
         tot = int(row.get("targets_total", 0))
         viol = int(row.get("n_violated", 0))
@@ -338,12 +595,16 @@ def generate_report(areas=None, weight_dir=None):
         if isinstance(dist, dict):
             n_zero = dist.get((0.0, 0.0), {}).get("count", 0)
         pct_zero = 100 * n_zero / n_rec if n_rec > 0 else 0
+        area_str = str(row["area"])
         lines.append(
-            f"{row['state']:<4} {row['status']:<14} {hit:>5} {tot:>5} "
+            f"{area_str:<{id_w}} {row['status']:<14} {hit:>5} {tot:>5} "
             f"{viol:>5} {me:>8.4f} {mx:>8.4f} "
             f"{rmse:>7.3f} {p5:>7.3f} {med:>7.3f} "
             f"{p95:>7.3f} {wmax:>8.1f} {pct_zero:>5.1f}%"
         )
+    if not show_all:
+        n_omitted = n_areas - len(display_df)
+        lines.append(f"  ... {n_omitted} areas omitted (all within tolerance)")
     lines.append("")
 
     # Violated targets by variable
@@ -357,7 +618,7 @@ def generate_report(areas=None, weight_dir=None):
             abs_miss = abs(v["achieved"] - v["target"])
             all_violated.append(
                 {
-                    "state": row["state"],
+                    "area": row["area"],
                     "varname": varname,
                     "cnt_type": cnt_type,
                     "pct_err": v["pct_err"],
@@ -372,16 +633,15 @@ def generate_report(areas=None, weight_dir=None):
         var_counts = vdf["varname"].value_counts()
         lines.append("VIOLATIONS BY VARIABLE:")
         for var, cnt in var_counts.items():
-            states_with = sorted(vdf[vdf["varname"] == var]["state"].unique())
+            areas_with = sorted(vdf[vdf["varname"] == var]["area"].unique())
             lines.append(
-                f"  {var}: {cnt} violations across "
-                f"{len(states_with)} states"
+                f"  {var}: {cnt} violations across " f"{len(areas_with)} areas"
             )
         lines.append("")
 
-        state_counts = vdf["state"].value_counts().head(10)
-        lines.append("STATES WITH MOST VIOLATIONS:")
-        for st, cnt in state_counts.items():
+        area_counts = vdf["area"].value_counts().head(10)
+        lines.append("AREAS WITH MOST VIOLATIONS:")
+        for st, cnt in area_counts.items():
             lines.append(f"  {st}: {cnt} violated")
         lines.append("")
 
@@ -394,7 +654,7 @@ def generate_report(areas=None, weight_dir=None):
         else:
             for _, r in amt_viol.head(5).iterrows():
                 lines.append(
-                    f"  {r['state']:<4} {r['pct_err']:.3f}% "
+                    f"  {r['area']:<4} {r['pct_err']:.3f}% "
                     f"target=${r['target']:>15,.0f}  "
                     f"achieved=${r['achieved']:>15,.0f}  "
                     f"miss=${r['abs_miss']:>12,.0f}  "
@@ -411,7 +671,7 @@ def generate_report(areas=None, weight_dir=None):
         else:
             for _, r in cnt_viol.head(5).iterrows():
                 lines.append(
-                    f"  {r['state']:<4} {r['pct_err']:.3f}% "
+                    f"  {r['area']:<4} {r['pct_err']:.3f}% "
                     f"target={r['target']:>12,.0f}  "
                     f"achieved={r['achieved']:>12,.0f}  "
                     f"miss={r['abs_miss']:>8,.0f}  "
@@ -419,28 +679,37 @@ def generate_report(areas=None, weight_dir=None):
                 )
         lines.append("")
 
-    # Weight diagnostics
-    lines.extend(_weight_diagnostics(areas, weight_dir))
+    # Weight diagnostics (uses pre-loaded data if available)
+    if report_data is not None:
+        tmd, s006, state_weights, n_loaded = report_data
+        lines.extend(
+            _weight_diagnostics(
+                areas,
+                weight_dir,
+                target_dir,
+                tmd,
+                s006,
+                state_weights,
+                n_loaded,
+            )
+        )
 
     report = "\n".join(lines)
     return report
 
 
-def _weight_diagnostics(areas, weight_dir=None):
+def _load_report_data(areas, weight_dir):
     """
-    Combined weight diagnostics: exhaustion + national aggregation.
+    Load TMD data and area weight files for diagnostics.
 
-    Loads TMD data and state weight files once, reuses for both.
-    Only reads the specific columns needed (not the full allvars).
-    """
-    if weight_dir is None:
-        weight_dir = STATE_WEIGHT_DIR
+    Called once from generate_report(); results passed to all
+    diagnostic functions to avoid reloading.
 
+    Returns (tmd, s006, state_weights, n_loaded) or None if
+    no weight files found.
+    """
     from tmd.storage import STORAGE_FOLDER
 
-    lines = []
-
-    # Load national data
     tmd_path = STORAGE_FOLDER / "output" / "tmd.csv.gz"
     tmd_cols = [
         "RECID",
@@ -455,6 +724,7 @@ def _weight_diagnostics(areas, weight_dir=None):
         "e00650",
         "e00900",
         "e01400",
+        "e01500",
         "e01700",
         "e02000",
         "e02300",
@@ -468,7 +738,6 @@ def _weight_diagnostics(areas, weight_dir=None):
     ]
     tmd = pd.read_csv(tmd_path, usecols=tmd_cols)
     tmd["capgains_net"] = tmd["p22250"] + tmd["p23250"]
-    n_records = len(tmd)
     s006 = tmd["s006"].values
 
     agi_path = STORAGE_FOLDER / "output" / "cached_c00100.npy"
@@ -495,7 +764,7 @@ def _weight_diagnostics(areas, weight_dir=None):
             for col in load_tc:
                 tmd[col] = allvars[col].values
 
-    # Load all state weights once
+    n_records = len(tmd)
     weight_sum = np.zeros(n_records)
     state_weights = {}
     for st in areas:
@@ -508,17 +777,151 @@ def _weight_diagnostics(areas, weight_dir=None):
 
     n_loaded = len(state_weights)
     if n_loaded == 0:
+        return None
+    return tmd, s006, state_weights, n_loaded
+
+
+def _weight_distribution_by_stub(
+    tmd, s006, state_weights, n_loaded, target_dir
+):
+    """
+    Show weighted return counts and AGI by AGI stub:
+    national (old) vs sum-of-areas (new) and change.
+    """
+    lines = []
+
+    if "c00100" not in tmd.columns:
+        return lines
+
+    # Get AGI bins from target file
+    first_area = next(iter(state_weights.keys()), None)
+    if first_area is None:
+        return lines
+    tgt_path = target_dir / f"{first_area}_targets.csv"
+    if not tgt_path.exists():
+        return lines
+    tgt_df = pd.read_csv(tgt_path, comment="#")
+
+    agi_pairs = set()
+    for _, row in tgt_df.iterrows():
+        lo, hi = float(row.agilo), float(row.agihi)
+        if lo < -1e10 and hi > 1e10:
+            continue
+        agi_pairs.add((lo, hi))
+    agi_bins = sorted(agi_pairs)
+    if not agi_bins:
         return lines
 
+    c00100 = tmd["c00100"].values
+    lines.append(f"WEIGHT DISTRIBUTION BY AGI STUB ({n_loaded} areas):")
+    lines.append(
+        "  National = s006-weighted totals."
+        " Sum-of-Areas = area-weight-weighted totals."
+    )
+
+    # Header
+    lines.append(
+        f"  {'AGI Stub':<18}"
+        f" {'Natl Returns':>14} {'Area Returns':>14} {'Chg%':>7}"
+        f" {'Natl AGI ($B)':>14} {'Area AGI ($B)':>14} {'Chg%':>7}"
+    )
+    lines.append("  " + "-" * 92)
+
+    # Running totals
+    tot_nat_ret = 0.0
+    tot_area_ret = 0.0
+    tot_nat_agi = 0.0
+    tot_area_agi = 0.0
+
+    for lo, hi in agi_bins:
+        in_bin = (c00100 >= lo) & (c00100 < hi)
+        mask = in_bin.astype(float)
+
+        # Returns
+        nat_ret = float((s006 * mask).sum())
+        area_ret = sum(float((w * mask).sum()) for w in state_weights.values())
+        ret_chg = (area_ret / nat_ret - 1) * 100 if nat_ret else 0
+
+        # AGI
+        agi_vals = c00100 * mask
+        nat_agi = float((s006 * agi_vals).sum())
+        area_agi = sum(
+            float((w * agi_vals).sum()) for w in state_weights.values()
+        )
+        agi_chg = (area_agi / nat_agi - 1) * 100 if nat_agi else 0
+
+        tot_nat_ret += nat_ret
+        tot_area_ret += area_ret
+        tot_nat_agi += nat_agi
+        tot_area_agi += area_agi
+
+        bin_label = _fmt_agi_bin(lo, hi)
+        flag = " *" if abs(ret_chg) > 2 or abs(agi_chg) > 2 else ""
+        lines.append(
+            f"  {bin_label:<18}"
+            f" {nat_ret:>14,.0f} {area_ret:>14,.0f}"
+            f" {ret_chg:>+6.2f}%"
+            f" {nat_agi / 1e9:>14.1f} {area_agi / 1e9:>14.1f}"
+            f" {agi_chg:>+6.2f}%{flag}"
+        )
+
+    # Total row
+    tot_ret_chg = (tot_area_ret / tot_nat_ret - 1) * 100 if tot_nat_ret else 0
+    tot_agi_chg = (tot_area_agi / tot_nat_agi - 1) * 100 if tot_nat_agi else 0
+    lines.append("  " + "-" * 92)
+    lines.append(
+        f"  {'TOTAL':<18}"
+        f" {tot_nat_ret:>14,.0f} {tot_area_ret:>14,.0f}"
+        f" {tot_ret_chg:>+6.2f}%"
+        f" {tot_nat_agi / 1e9:>14.1f} {tot_area_agi / 1e9:>14.1f}"
+        f" {tot_agi_chg:>+6.2f}%"
+    )
+
+    n_flagged = 0
+    for lo, hi in agi_bins:
+        in_bin = (c00100 >= lo) & (c00100 < hi)
+        mask = in_bin.astype(float)
+        nat_ret = float((s006 * mask).sum())
+        area_ret = sum(float((w * mask).sum()) for w in state_weights.values())
+        ret_chg = (area_ret / nat_ret - 1) * 100 if nat_ret else 0
+        agi_vals = c00100 * mask
+        nat_agi = float((s006 * agi_vals).sum())
+        area_agi = sum(
+            float((w * agi_vals).sum()) for w in state_weights.values()
+        )
+        agi_chg = (area_agi / nat_agi - 1) * 100 if nat_agi else 0
+        if abs(ret_chg) > 2 or abs(agi_chg) > 2:
+            n_flagged += 1
+    if n_flagged:
+        lines.append(f"  * = {n_flagged} stubs with >2% change")
+    lines.append("")
+
+    return lines
+
+
+def _weight_diagnostics(
+    _areas, _weight_dir, target_dir, tmd, s006, state_weights, n_loaded
+):
+    """
+    Combined weight diagnostics: exhaustion + national aggregation.
+
+    Uses pre-loaded TMD data and weight files (from _load_report_data).
+    """
+    lines = []
+    n_records = len(tmd)
+
     # Weight exhaustion
+    weight_sum = np.zeros(n_records)
+    for w in state_weights.values():
+        weight_sum += w
     usage = weight_sum / s006
 
-    _sub = "sum of state weights / national weight"
+    _sub = "sum of area weights / national weight"
     lines.append(f"WEIGHT EXHAUSTION ({_sub}):")
     lines.append(
         f"  A ratio of 1.0 means the record's national"
         f" weight is fully allocated across"
-        f" {n_loaded} states."
+        f" {n_loaded} areas."
     )
     pcts = [0, 1, 5, 10, 25, 50, 75, 90, 95, 99, 100]
     quantiles = np.percentile(usage, pcts)
@@ -575,8 +978,7 @@ def _weight_diagnostics(areas, weight_dir=None):
             f" ptshp=${r.e26270:,.0f}"
         )
         lines.append(
-            f"     top states: {top3}"
-            f" ({len(st_wts)} nonzero of {n_loaded})"
+            f"     top areas: {top3}" f" ({len(st_wts)} nonzero of {n_loaded})"
         )
     lines.append("")
 
@@ -611,37 +1013,49 @@ def _weight_diagnostics(areas, weight_dir=None):
                 state_sums[var] += float((w * tmd[var].values).sum())
 
     lines.append(
-        f"CROSS-STATE AGGREGATION vs NATIONAL TOTALS"
-        f" for SELECTED VARIABLES ({n_loaded} states):"
+        f"CROSS-AREA AGGREGATION vs NATIONAL TOTALS"
+        f" for SELECTED VARIABLES ({n_loaded} areas):"
+    )
+    lines.append(
+        "  Do area weights preserve national totals?  Diff% near 0 = good."
     )
     lines.append(
         f"  {'Variable':<30} {'National':>16}"
-        f" {'Sum-of-States':>16} {'Diff%':>8}"
+        f" {'Sum-of-Areas':>16} {'Diff':>14} {'Diff%':>8}"
     )
-    lines.append("  " + "-" * 72)
+    lines.append("  " + "-" * 86)
     for label, var, is_count in check_vars:
         nat = national[var]
         sos = state_sums[var]
         if nat is None or nat == 0:
             continue
+        diff = sos - nat
         diff_pct = (sos / nat - 1) * 100
         if is_count:
             lines.append(
                 f"  {label:<30} {nat:>16,.0f}"
-                f" {sos:>16,.0f} {diff_pct:>+7.2f}%"
+                f" {sos:>16,.0f}"
+                f" {diff:>+14,.0f}"
+                f" {diff_pct:>+7.2f}%"
             )
         else:
             lines.append(
                 f"  {label:<30}"
                 f" ${nat / 1e9:>14.1f}B"
                 f" ${sos / 1e9:>14.1f}B"
+                f" ${diff / 1e9:>+12.2f}B"
                 f" {diff_pct:>+7.2f}%"
             )
     lines.append("")
 
-    # Bystander check: untargeted variables
+    # Bystander check: untargeted variables (aggregate)
     lines.extend(_bystander_check(tmd, s006, state_weights, n_loaded))
 
+    # Bystander check: per-bin analysis for targeted + key untargeted vars
+    lines.extend(
+        _bystander_by_bin(tmd, s006, state_weights, n_loaded, target_dir)
+    )
+
     return lines
 
 
@@ -687,12 +1101,7 @@ def _bystander_check(tmd, s006, state_weights, n_loaded):
     for label, var, is_count in bystander_vars:
         if var not in tmd.columns:
             continue
-        if var == "XTOT":
-            nat = float((s006 * tmd[var].values).sum())
-        elif is_count:
-            nat = float((s006 * tmd[var].values).sum())
-        else:
-            nat = float((s006 * tmd[var].values).sum())
+        nat = float((s006 * tmd[var].values).sum())
         if abs(nat) < 1:
             continue
         sos = 0.0
@@ -705,7 +1114,7 @@ def _bystander_check(tmd, s006, state_weights, n_loaded):
     results.sort(key=lambda x: -abs(x[5]))
 
     lines.append(
-        "BYSTANDER CHECK: UNTARGETED VARIABLES" f" ({n_loaded} states):"
+        "BYSTANDER CHECK: UNTARGETED VARIABLES" f" ({n_loaded} areas):"
     )
     lines.append(
         "  Variables NOT directly targeted — distortion"
@@ -713,15 +1122,17 @@ def _bystander_check(tmd, s006, state_weights, n_loaded):
     )
     lines.append(
         f"  {'Variable':<30} {'National':>16}"
-        f" {'Sum-of-States':>16} {'Diff%':>8}"
+        f" {'Sum-of-Areas':>16} {'Diff':>14} {'Diff%':>8}"
     )
-    lines.append("  " + "-" * 72)
+    lines.append("  " + "-" * 86)
     for label, _var, is_count, nat, sos, diff_pct in results:
+        diff = sos - nat
         flag = " ***" if abs(diff_pct) > 2 else ""
         if is_count:
             lines.append(
                 f"  {label:<30} {nat:>16,.0f}"
                 f" {sos:>16,.0f}"
+                f" {diff:>+14,.0f}"
                 f" {diff_pct:>+7.2f}%{flag}"
             )
         else:
@@ -729,6 +1140,7 @@ def _bystander_check(tmd, s006, state_weights, n_loaded):
                 f"  {label:<30}"
                 f" ${nat / 1e9:>14.1f}B"
                 f" ${sos / 1e9:>14.1f}B"
+                f" ${diff / 1e9:>+12.2f}B"
                 f" {diff_pct:>+7.2f}%{flag}"
             )
 
@@ -748,24 +1160,283 @@ def _bystander_check(tmd, s006, state_weights, n_loaded):
     return lines
 
 
+def _bystander_by_bin(tmd, s006, state_weights, n_loaded, target_dir):
+    """
+    Per-AGI-bin bystander analysis.
+
+    For both targeted and untargeted variable-bin combinations,
+    computes cross-area aggregation distortion. Shows whether
+    dropped targets (variable-bin combos excluded from the recipe)
+    are still well-behaved or drifting.
+    """
+    lines = []
+
+    # Read one target file to identify what's targeted
+    first_area = next(iter(state_weights.keys()), None)
+    if first_area is None:
+        return lines
+    tgt_path = target_dir / f"{first_area}_targets.csv"
+    if not tgt_path.exists():
+        return lines
+    tgt_df = pd.read_csv(tgt_path, comment="#")
+
+    # Build set of targeted (varname, count, agilo, agihi, fstatus)
+    targeted_set = set()
+    for _, row in tgt_df.iterrows():
+        targeted_set.add(
+            (
+                row.varname,
+                int(row["count"]),
+                float(row.agilo),
+                float(row.agihi),
+                int(row.fstatus),
+            )
+        )
+
+    # Get AGI bins from target file (exclude the all-bins row)
+    agi_pairs = set()
+    for _, row in tgt_df.iterrows():
+        lo, hi = float(row.agilo), float(row.agihi)
+        if lo < -1e10 and hi > 1e10:
+            continue  # skip all-bins
+        agi_pairs.add((lo, hi))
+    agi_bins = sorted(agi_pairs)
+
+    if not agi_bins or "c00100" not in tmd.columns:
+        return lines
+
+    # Pre-compute AGI bin masks
+    agi_masks = {}
+    for lo, hi in agi_bins:
+        agi_masks[(lo, hi)] = (tmd["c00100"].values >= lo) & (
+            tmd["c00100"].values < hi
+        )
+
+    # Variables to check: recipe vars + key untargeted
+    # (label, varname, count_type)
+    check_vars = [
+        # Targeted amount variables
+        ("AGI", "c00100", 0),
+        ("Wages", "e00200", 0),
+        ("Interest", "e00300", 0),
+        ("Pensions tot", "e01500", 0),
+        ("Social Security", "e02400", 0),
+        ("SALT ded", "c18300", 0),
+        ("Partnership", "e26270", 0),
+        # Count targets
+        ("Returns", "c00100", 1),
+        ("Wage nz-count", "e00200", 2),
+        # Key untargeted (for bystander effect)
+        ("Income tax", "iitax", 0),
+        ("Cap gains net", "capgains_net", 0),
+        ("Sch C income", "e00900", 0),
+        ("IRA distrib", "e01400", 0),
+        ("Pensions txbl", "e01700", 0),
+        ("Sch E net", "e02000", 0),
+        ("Tax-exempt int", "e00400", 0),
+    ]
+
+    results = []
+    for label, varname, cnt in check_vars:
+        if varname not in tmd.columns:
+            continue
+        var_vals = tmd[varname].values
+
+        for lo, hi in agi_bins:
+            in_bin = agi_masks[(lo, hi)]
+
+            # Build the value array for this variable-bin-count combo
+            if cnt == 0:
+                vals = var_vals * in_bin
+            elif cnt == 1:
+                vals = in_bin.astype(float)
+            elif cnt == 2:
+                vals = ((var_vals != 0) & in_bin).astype(float)
+            else:
+                continue
+
+            # National total
+            nat = float((s006 * vals).sum())
+            if abs(nat) < 1:
+                continue
+
+            # Sum-of-areas total
+            sos = 0.0
+            for w in state_weights.values():
+                sos += float((w * vals).sum())
+            diff_pct = (sos / nat - 1) * 100
+
+            # Check if this combo is targeted (fstatus=0 for amounts/counts)
+            is_targeted = (varname, cnt, lo, hi, 0) in targeted_set
+
+            results.append(
+                {
+                    "label": label,
+                    "varname": varname,
+                    "cnt": cnt,
+                    "lo": lo,
+                    "hi": hi,
+                    "nat": nat,
+                    "sos": sos,
+                    "diff_pct": diff_pct,
+                    "targeted": is_targeted,
+                }
+            )
+
+    if not results:
+        return lines
+
+    # Sort by absolute distortion
+    results.sort(key=lambda x: -abs(x["diff_pct"]))
+
+    # Show top 30 by distortion
+    n_show = min(30, len(results))
+    n_total = len(results)
+    cnt_labels = {0: "amt", 1: "returns", 2: "nz-count"}
+    lines.append(f"PER-BIN DISTORTION ANALYSIS ({n_loaded} areas):")
+    lines.append(
+        f"  T = targeted, U = untargeted."
+        f" Sorted by |distortion|,"
+        f" top {n_show} out of {n_total} combinations."
+    )
+    col_hdr = (
+        f"  {'':>1} {'Variable':<18} {'Type':<10}"
+        f" {'AGI Bin':<16}"
+        f" {'National':>14} {'Sum-Areas':>14}"
+        f" {'Diff':>14} {'Diff%':>8}"
+    )
+    lines.append(col_hdr)
+    lines.append("  " + "-" * (len(col_hdr) - 2))
+
+    for r in results[:n_show]:
+        mark = "T" if r["targeted"] else "U"
+        ct_label = cnt_labels.get(r["cnt"], f"cnt{r['cnt']}")
+        bin_label = _fmt_agi_bin(r["lo"], r["hi"])
+        flag = " ***" if abs(r["diff_pct"]) > 5 else ""
+        diff = r["sos"] - r["nat"]
+
+        if r["cnt"] == 0:
+            nat_s = f"${r['nat'] / 1e9:.2f}B"
+            sos_s = f"${r['sos'] / 1e9:.2f}B"
+            diff_s = f"${diff / 1e9:+.2f}B"
+        else:
+            nat_s = f"{r['nat']:,.0f}"
+            sos_s = f"{r['sos']:,.0f}"
+            diff_s = f"{diff:+,.0f}"
+
+        lines.append(
+            f"  {mark} {r['label']:<18} {ct_label:<10}"
+            f" {bin_label:<16}"
+            f" {nat_s:>14} {sos_s:>14}"
+            f" {diff_s:>14}"
+            f" {r['diff_pct']:>+7.2f}%{flag}"
+        )
+
+    # Summary
+    untargeted = [r for r in results if not r["targeted"]]
+    targeted_results = [r for r in results if r["targeted"]]
+    lines.append("")
+    lines.append(
+        f"  Targeted: {len(targeted_results)}"
+        f"   Untargeted: {len(untargeted)}"
+    )
+    if untargeted:
+        worst_d = max(abs(r["diff_pct"]) for r in untargeted)
+        mean_d = np.mean([abs(r["diff_pct"]) for r in untargeted])
+        n_bad = sum(1 for r in untargeted if abs(r["diff_pct"]) > 5)
+        lines.append(
+            f"  Untargeted distortion:"
+            f" mean={mean_d:.2f}%, worst={worst_d:.2f}%"
+        )
+        if n_bad:
+            lines.append(
+                f"  *** = {n_bad} untargeted combos" f" with >5% distortion"
+            )
+        else:
+            lines.append("  All untargeted combos within 5% distortion.")
+    if targeted_results:
+        worst_t = max(abs(r["diff_pct"]) for r in targeted_results)
+        mean_t = np.mean([abs(r["diff_pct"]) for r in targeted_results])
+        lines.append(
+            f"  Targeted-bin distortion:"
+            f" mean={mean_t:.2f}%, worst={worst_t:.2f}%"
+        )
+    lines.append("")
+
+    return lines
+
+
 def main():
     parser = argparse.ArgumentParser(
-        description="Cross-state quality summary report",
+        description="Cross-area quality summary report",
     )
     parser.add_argument(
         "--scope",
         default=None,
-        help="Comma-separated state codes (default: all states)",
+        help=(
+            "'states', 'cds', or comma-separated area codes"
+            " (default: all states)"
+        ),
+    )
+    parser.add_argument(
+        "--output",
+        "-o",
+        nargs="?",
+        const="auto",
+        default=None,
+        help=(
+            "Save report to file. No argument = auto-generate"
+            " filename in weight directory. Or specify a path."
+        ),
     )
     args = parser.parse_args()
 
     areas = None
-    if args.scope and args.scope.lower() != "states":
-        areas = [s.strip().upper() for s in args.scope.split(",")]
+    weight_dir = None
+    target_dir = None
+    scope_label = "states"
+    if args.scope:
+        scope_lower = args.scope.lower().strip()
+        if scope_lower == "cds":
+            weight_dir = CD_WEIGHT_DIR
+            target_dir = CD_TARGET_DIR
+            scope_label = "cds"
+        elif scope_lower == "states":
+            weight_dir = STATE_WEIGHT_DIR
+            target_dir = STATE_TARGET_DIR
+            scope_label = "states"
+        else:
+            codes = [s.strip().upper() for s in args.scope.split(",")]
+            # Detect CDs vs states by code length
+            if codes and len(codes[0]) > 2:
+                weight_dir = CD_WEIGHT_DIR
+                target_dir = CD_TARGET_DIR
+                scope_label = f"cds ({len(codes)} selected)"
+            else:
+                scope_label = f"states ({len(codes)} selected)"
+            areas = codes
 
-    report = generate_report(areas)
+    if weight_dir is None:
+        weight_dir = STATE_WEIGHT_DIR
+        target_dir = STATE_TARGET_DIR
+
+    report = generate_report(
+        areas,
+        weight_dir=weight_dir,
+        target_dir=target_dir,
+        scope_label=scope_label,
+    )
     print(report)
 
+    # Save to file
+    if args.output is not None:
+        if args.output == "auto":
+            output_path = weight_dir / "quality_report.txt"
+        else:
+            output_path = Path(args.output)
+        output_path.write_text(report, encoding="utf-8")
+        print(f"\nReport saved to: {output_path}")
+
 
 if __name__ == "__main__":
     main()