From 3532184b9bf92526f7fcef9401a867a1bf25cc52 Mon Sep 17 00:00:00 2001
From: Johan Mathe <johan.mathe@gmail.com>
Date: Sun, 18 Jan 2026 12:00:21 -0800
Subject: [PATCH 1/6] Add so3 deblur demo

---
 examples/so3_deblur_demo.py | 213 ++++++++++++++++++++++++++++++++++++
 pyproject.toml              |   2 +-
 2 files changed, 214 insertions(+), 1 deletion(-)
 create mode 100644 examples/so3_deblur_demo.py

diff --git a/examples/so3_deblur_demo.py b/examples/so3_deblur_demo.py
new file mode 100644
index 0000000..bc5ac94
--- /dev/null
+++ b/examples/so3_deblur_demo.py
@@ -0,0 +1,213 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.optim as optim
+from torch_harmonics import InverseRealSHT
+
+from bispectrum import SO3onS2
+
+
+def run_deblurring_demo():
+    """
+    Demo: Recovering high-frequency structure using bispectrum constraints.
+
+    Scenario: We observe a blurry signal and want to reconstruct the sharp original.
+    - Model A: Only regularization toward input (no structural prior) - stays blurry
+    - Model B: Regularization + bispectrum constraint - recovers high-frequency structure
+
+    The bispectrum acts as a "structural fingerprint" that encodes phase relationships
+    between different frequency components, allowing recovery of detail that pure
+    smoothness-based methods cannot achieve.
+    """
+    # --- Configuration ---
+    LMAX = 5  # Resolution (keep low for speed)
+    LAMBDA_REG = 0.01  # Regularization toward input (prevents divergence)
+    LAMBDA_BSP = 10.0  # Strength of bispectrum constraint
+    STEPS = 400
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    bsp_module = SO3onS2(lmax=LMAX).to(device)
+
+    # --- 1. Generate "Ground Truth" (Sharp Signal) ---
+    # Power law decay l^-2 with meaningful high-frequency content
+    torch.manual_seed(42)  # Reproducibility
+    truth_coeffs = torch.randn(1, LMAX + 1, LMAX + 1, dtype=torch.complex64, device=device)
+    for l in range(LMAX + 1):
+        truth_coeffs[:, l, :] *= 1.0 / (1.0 + l**2)
+
+    # Pre-compute the "Structural Fingerprint" (rotation-invariant bispectrum)
+    # In practice, this could come from physical constraints or a reference signal
+    with torch.no_grad():
+        truth_invariants = bsp_module(truth_coeffs)
+
+    # --- 2. Generate "Input" (Blurry Observation) ---
+    # Heavily dampen high frequencies to simulate degraded observation
+    input_coeffs = truth_coeffs.clone().detach()
+    for l in range(3, LMAX + 1):
+        input_coeffs[:, l, :] *= 0.1  # Kill high frequencies
+
+    # --- 3. Optimization: Regularization Only vs. Regularization + Bispectrum ---
+    # Neither model has access to ground truth pixels!
+
+    # Model A: Only stays close to input (no structural guidance)
+    coeffs_A = input_coeffs.clone().detach().requires_grad_(True)
+    opt_A = optim.Adam([coeffs_A], lr=0.01)
+
+    # Model B: Stays close to input + matches bispectrum structure
+    coeffs_B = input_coeffs.clone().detach().requires_grad_(True)
+    opt_B = optim.Adam([coeffs_B], lr=0.01)
+
+    print('Training starts...')
+    print('Model A: Regularization only (no structural prior)')
+    print('Model B: Regularization + Bispectrum constraint')
+    print('-' * 50)
+
+    for step in range(STEPS):
+        # --- Model A: Only regularization toward input ---
+        opt_A.zero_grad()
+        # Just penalize deviation from input - will stay blurry
+        loss_reg_A = torch.mean(torch.abs(coeffs_A - input_coeffs) ** 2)
+        loss_A = loss_reg_A
+        loss_A.backward()
+        opt_A.step()
+
+        # --- Model B: Regularization + Bispectrum ---
+        opt_B.zero_grad()
+
+        # 1. Regularization: don't deviate too far from input
+        loss_reg_B = torch.mean(torch.abs(coeffs_B - input_coeffs) ** 2)
+
+        # 2. Bispectrum constraint: match the structural fingerprint
+        pred_invariants = bsp_module(coeffs_B)
+        loss_bsp = torch.mean(torch.abs(pred_invariants - truth_invariants) ** 2)
+
+        loss_B = LAMBDA_REG * loss_reg_B + LAMBDA_BSP * loss_bsp
+        loss_B.backward()
+        opt_B.step()
+
+        if step % 100 == 0:
+            print(
+                f'Step {step}: Loss A={loss_A.item():.6f} | Loss B={loss_B.item():.6f} (bsp={loss_bsp.item():.6f})'
+            )
+
+    print('-' * 50)
+    print('Done!')
+
+    return truth_coeffs, input_coeffs, coeffs_A.detach(), coeffs_B.detach()
+
+
+# --- Spherical Rendering ---
+def sh_to_spatial(coeffs: torch.Tensor, nlat: int = 64, nlon: int = 128) -> np.ndarray:
+    """Convert SH coefficients to a spatial grid on the sphere using torch-harmonics.
+
+    Args:
+        coeffs: (1, lmax+1, mmax+1) complex tensor with SH coefficients
+        nlat: number of latitude points
+        nlon: number of longitude points
+
+    Returns:
+        (nlat, nlon) real array of the function on the sphere
+    """
+    lmax = coeffs.shape[1]
+    mmax = coeffs.shape[2]
+
+    # Create inverse SHT
+    isht = InverseRealSHT(nlat, nlon, lmax=lmax, mmax=mmax, grid='equiangular', norm='ortho')
+    isht = isht.to(coeffs.device)
+
+    # Transform to spatial domain
+    spatial = isht(coeffs.to(torch.complex64)).squeeze(0).detach().cpu().numpy()
+
+    return spatial
+
+
+# --- Analysis Helper ---
+def analyze_results(truth, blurry, res_reg_only, res_bispectrum):
+    """
+    Combined visualization: spatial images + power spectrum.
+    """
+
+    def get_power_spectrum(coeffs):
+        return torch.sum(torch.abs(coeffs) ** 2, dim=-1).squeeze().detach().cpu().numpy()
+
+    # Render all signals to spatial domain
+    print('Rendering spherical harmonics to spatial domain...')
+    img_truth = sh_to_spatial(truth)
+    img_blurry = sh_to_spatial(blurry)
+    img_reg = sh_to_spatial(res_reg_only)
+    img_bsp = sh_to_spatial(res_bispectrum)
+
+    # Compute power spectra
+    ps_truth = get_power_spectrum(truth)
+    ps_blurry = get_power_spectrum(blurry)
+    ps_reg = get_power_spectrum(res_reg_only)
+    ps_bsp = get_power_spectrum(res_bispectrum)
+
+    # Common colormap range for fair comparison
+    vmin = min(img_truth.min(), img_blurry.min(), img_reg.min(), img_bsp.min())
+    vmax = max(img_truth.max(), img_blurry.max(), img_reg.max(), img_bsp.max())
+
+    # Create combined figure
+    fig = plt.figure(figsize=(14, 10))
+
+    # Top row: spatial images
+    ax1 = fig.add_subplot(2, 3, 1)
+    im1 = ax1.imshow(
+        img_truth, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax1.set_title('Ground Truth (Sharp)', fontweight='bold')
+    ax1.set_xlabel('Longitude')
+    ax1.set_ylabel('Latitude')
+
+    ax2 = fig.add_subplot(2, 3, 2)
+    ax2.imshow(
+        img_blurry, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax2.set_title('Input (Blurry)', fontweight='bold')
+    ax2.set_xlabel('Longitude')
+
+    ax3 = fig.add_subplot(2, 3, 4)
+    ax3.imshow(
+        img_reg, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax3.set_title('Regularization Only', fontweight='bold')
+    ax3.set_xlabel('Longitude')
+    ax3.set_ylabel('Latitude')
+
+    ax4 = fig.add_subplot(2, 3, 5)
+    ax4.imshow(
+        img_bsp, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax4.set_title('Bispectrum Constraint', fontweight='bold', color='green')
+    ax4.set_xlabel('Longitude')
+
+    # Add colorbar
+    cbar_ax = fig.add_axes([0.02, 0.15, 0.01, 0.7])
+    fig.colorbar(im1, cax=cbar_ax)
+
+    # Right column: power spectrum
+    ax5 = fig.add_subplot(1, 3, 3)
+    ax5.plot(ps_truth, 'k-', linewidth=2, marker='o', label='Ground Truth')
+    ax5.plot(ps_blurry, 'r--', linewidth=2, marker='s', label='Input (Blurry)')
+    ax5.plot(ps_reg, 'b-', linewidth=1.5, marker='^', label='Reg. Only')
+    ax5.plot(ps_bsp, 'g-', linewidth=2, marker='d', label='Bispectrum')
+    ax5.set_yscale('log')
+    ax5.set_xlabel('Degree l')
+    ax5.set_ylabel('Power')
+    ax5.set_title('Power Spectrum')
+    ax5.legend(loc='upper right', fontsize=9)
+    ax5.grid(True, alpha=0.3)
+
+    plt.suptitle(
+        'High-Frequency Recovery via Bispectrum Constraint', fontsize=14, fontweight='bold'
+    )
+    plt.tight_layout(rect=[0.04, 0, 1, 0.96])
+
+    plt.savefig('deblur_results.png', dpi=150, bbox_inches='tight')
+    plt.close()
+    print('Saved plot to deblur_results.png')
+
+
+if __name__ == '__main__':
+    results = run_deblurring_demo()
+    analyze_results(*results)
diff --git a/pyproject.toml b/pyproject.toml
index eadf8cc..c00dcbe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,7 +52,7 @@ optional-dependencies.dev = [
   "mypy>=1.5.0",
   "pre-commit>=3.3.3",
 ]
-optional-dependencies.plotting = ["matplotlib>=3.7.0"]
+optional-dependencies.examples = ["matplotlib>=3.7.0"]
 
 urls.Homepage = "https://github.com/geometric-intelligence/bispectrum"
 urls.Repository = "https://github.com/geometric-intelligence/bispectrum"

From 3f5d33588aaef1888c6acac3997d91fdbacb7758 Mon Sep 17 00:00:00 2001
From: Johan Mathe <johan.mathe@gmail.com>
Date: Sun, 18 Jan 2026 12:24:53 -0800
Subject: [PATCH 2/6] Add demo

---
 so3_3_trains.py | 449 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 449 insertions(+)
 create mode 100644 so3_3_trains.py

diff --git a/so3_3_trains.py b/so3_3_trains.py
new file mode 100644
index 0000000..0f84fe4
--- /dev/null
+++ b/so3_3_trains.py
@@ -0,0 +1,449 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.optim as optim
+from torch_harmonics import InverseRealSHT
+
+from bispectrum import SO3onS2
+
+
+def power_spectrum(coeffs: torch.Tensor) -> torch.Tensor:
+    """Compute power spectrum from SH coefficients.
+
+    Args:
+        coeffs: [B, L, M] complex tensor
+
+    Returns:
+        [B, L] tensor of power per degree
+    """
+    return torch.sum(torch.abs(coeffs) ** 2, dim=-1)
+
+
+def run_deblurring_demo():
+    """
+    Demo: Recovering high-frequency structure using bispectrum constraints.
+
+    Scenario: We observe a blurry signal and want to reconstruct the sharp original.
+    - Model A: Only regularization toward input (no structural prior) - stays blurry
+    - Model B: Regularization + bispectrum constraint - recovers high-frequency structure
+    - Model C: Regularization + spectral constraint - matches power but NOT structure
+
+    The bispectrum acts as a "structural fingerprint" that encodes phase relationships
+    between different frequency components, allowing recovery of detail that pure
+    smoothness-based methods cannot achieve. Model C demonstrates that matching
+    power spectrum alone is insufficient - you need phase coherence from bispectrum.
+    """
+    # --- Configuration ---
+    LMAX = 5  # Resolution (keep low for speed)
+    LAMBDA_REG = 0.01  # Regularization toward input (prevents divergence)
+    LAMBDA_BSP = 10.0  # Strength of bispectrum constraint
+    LAMBDA_SPEC = 10.0  # Strength of spectral (power spectrum) constraint
+    STEPS = 400
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    bsp_module = SO3onS2(lmax=LMAX).to(device)
+
+    # --- 1. Generate "Ground Truth" (Sharp Signal) ---
+    # Power law decay l^-2 with meaningful high-frequency content
+    torch.manual_seed(42)  # Reproducibility
+    truth_coeffs = torch.randn(1, LMAX + 1, LMAX + 1, dtype=torch.complex64, device=device)
+    for l in range(LMAX + 1):
+        truth_coeffs[:, l, :] *= 1.0 / (1.0 + l**2)
+
+    # Pre-compute the "Structural Fingerprint" (rotation-invariant bispectrum)
+    # In practice, this could come from physical constraints or a reference signal
+    with torch.no_grad():
+        truth_invariants = bsp_module(truth_coeffs)
+        truth_power = power_spectrum(truth_coeffs)
+
+    # --- 2. Generate "Input" (Blurry Observation) ---
+    # Heavily dampen high frequencies to simulate degraded observation
+    input_coeffs = truth_coeffs.clone().detach()
+    for l in range(3, LMAX + 1):
+        input_coeffs[:, l, :] *= 0.1  # Kill high frequencies
+
+    # --- 3. Optimization: Compare three approaches ---
+    # Neither model has access to ground truth pixels!
+
+    # Model A: Only stays close to input (no structural guidance)
+    coeffs_A = input_coeffs.clone().detach().requires_grad_(True)
+    opt_A = optim.Adam([coeffs_A], lr=0.01)
+
+    # Model B: Stays close to input + matches bispectrum structure
+    coeffs_B = input_coeffs.clone().detach().requires_grad_(True)
+    opt_B = optim.Adam([coeffs_B], lr=0.01)
+
+    # Model C: Stays close to input + matches power spectrum (spectral loss)
+    coeffs_C = input_coeffs.clone().detach().requires_grad_(True)
+    opt_C = optim.Adam([coeffs_C], lr=0.01)
+
+    print('Training starts...')
+    print('Model A: Regularization only (no structural prior)')
+    print('Model B: Regularization + Bispectrum constraint')
+    print('Model C: Regularization + Spectral (power spectrum) constraint')
+    print('-' * 70)
+
+    for step in range(STEPS):
+        # --- Model A: Only regularization toward input ---
+        opt_A.zero_grad()
+        # Just penalize deviation from input - will stay blurry
+        loss_reg_A = torch.mean(torch.abs(coeffs_A - input_coeffs) ** 2)
+        loss_A = loss_reg_A
+        loss_A.backward()
+        opt_A.step()
+
+        # --- Model B: Regularization + Bispectrum ---
+        opt_B.zero_grad()
+
+        # 1. Regularization: don't deviate too far from input
+        loss_reg_B = torch.mean(torch.abs(coeffs_B - input_coeffs) ** 2)
+
+        # 2. Bispectrum constraint: match the structural fingerprint
+        pred_invariants = bsp_module(coeffs_B)
+        loss_bsp = torch.mean(torch.abs(pred_invariants - truth_invariants) ** 2)
+
+        loss_B = LAMBDA_REG * loss_reg_B + LAMBDA_BSP * loss_bsp
+        loss_B.backward()
+        opt_B.step()
+
+        # --- Model C: Regularization + Spectral (power spectrum) ---
+        opt_C.zero_grad()
+
+        loss_reg_C = torch.mean(torch.abs(coeffs_C - input_coeffs) ** 2)
+
+        pred_power = power_spectrum(coeffs_C)
+        loss_spec = torch.mean((pred_power - truth_power) ** 2)
+
+        loss_C = LAMBDA_REG * loss_reg_C + LAMBDA_SPEC * loss_spec
+        loss_C.backward()
+        opt_C.step()
+
+        if step % 100 == 0:
+            print(
+                f'Step {step}: '
+                f'Loss A={loss_A.item():.6f} | '
+                f'Loss B={loss_B.item():.6f} (bsp={loss_bsp.item():.6f}) | '
+                f'Loss C={loss_C.item():.6f} (spec={loss_spec.item():.6f})'
+            )
+
+    print('-' * 70)
+    print('Done!')
+
+    return truth_coeffs, input_coeffs, coeffs_A.detach(), coeffs_B.detach(), coeffs_C.detach()
+
+
+# --- Spherical Rendering ---
+def sh_to_spatial(coeffs: torch.Tensor, nlat: int = 64, nlon: int = 128) -> np.ndarray:
+    """Convert SH coefficients to a spatial grid on the sphere using torch-harmonics.
+
+    Args:
+        coeffs: (1, lmax+1, mmax+1) complex tensor with SH coefficients
+        nlat: number of latitude points
+        nlon: number of longitude points
+
+    Returns:
+        (nlat, nlon) real array of the function on the sphere
+    """
+    lmax = coeffs.shape[1]
+    mmax = coeffs.shape[2]
+
+    # Create inverse SHT
+    isht = InverseRealSHT(nlat, nlon, lmax=lmax, mmax=mmax, grid='equiangular', norm='ortho')
+    isht = isht.to(coeffs.device)
+
+    # Transform to spatial domain
+    spatial = isht(coeffs.to(torch.complex64)).squeeze(0).detach().cpu().numpy()
+
+    return spatial
+
+
+# --- Analysis Helper ---
+def analyze_results(
+    truth: torch.Tensor,
+    blurry: torch.Tensor,
+    res_reg_only: torch.Tensor,
+    res_bispectrum: torch.Tensor,
+    res_spectral: torch.Tensor,
+) -> None:
+    """
+    Combined visualization: spatial images + power spectrum + numerical metrics.
+    """
+    device = truth.device
+    lmax = truth.shape[1] - 1
+
+    # Create bispectrum module for metric computation
+    bsp_module = SO3onS2(lmax=lmax).to(device)
+
+    def get_power_spectrum(coeffs: torch.Tensor) -> np.ndarray:
+        return torch.sum(torch.abs(coeffs) ** 2, dim=-1).squeeze().detach().cpu().numpy()
+
+    def power_mse(coeffs: torch.Tensor, ref: torch.Tensor) -> float:
+        ps1 = power_spectrum(coeffs)
+        ps2 = power_spectrum(ref)
+        return torch.mean((ps1 - ps2) ** 2).item()
+
+    def bispec_mse(coeffs: torch.Tensor, ref: torch.Tensor) -> float:
+        with torch.no_grad():
+            inv1 = bsp_module(coeffs)
+            inv2 = bsp_module(ref)
+        return torch.mean(torch.abs(inv1 - inv2) ** 2).item()
+
+    # --- Compute Numerical Metrics ---
+    metrics: dict[str, dict[str, float]] = {}
+    models = [
+        ('Input (Blurry)', blurry),
+        ('Reg. Only', res_reg_only),
+        ('Spectral', res_spectral),
+        ('Bispectrum', res_bispectrum),
+    ]
+
+    for name, coeffs in models:
+        metrics[name] = {
+            'power_mse': power_mse(coeffs, truth),
+            'bispec_mse': bispec_mse(coeffs, truth),
+        }
+
+    # Print metrics to console
+    print('\n' + '=' * 70)
+    print('NUMERICAL METRICS (MSE vs Ground Truth)')
+    print('=' * 70)
+    print(f'{"Model":<25} {"Power Spectrum MSE":>20} {"Bispectrum MSE":>20}')
+    print('-' * 70)
+    for name, m in metrics.items():
+        print(f'{name:<25} {m["power_mse"]:>20.6e} {m["bispec_mse"]:>20.6e}')
+    print('=' * 70)
+
+    # Render all signals to spatial domain
+    print('Rendering spherical harmonics to spatial domain...')
+    img_truth = sh_to_spatial(truth)
+    img_blurry = sh_to_spatial(blurry)
+    img_reg = sh_to_spatial(res_reg_only)
+    img_bsp = sh_to_spatial(res_bispectrum)
+    img_spec = sh_to_spatial(res_spectral)
+
+    # Compute power spectra for plotting
+    ps_truth = get_power_spectrum(truth)
+    ps_blurry = get_power_spectrum(blurry)
+    ps_reg = get_power_spectrum(res_reg_only)
+    ps_bsp = get_power_spectrum(res_bispectrum)
+    ps_spec = get_power_spectrum(res_spectral)
+
+    # Common colormap range for fair comparison
+    all_imgs = [img_truth, img_blurry, img_reg, img_bsp, img_spec]
+    vmin = min(img.min() for img in all_imgs)
+    vmax = max(img.max() for img in all_imgs)
+
+    # Create combined figure: 3 rows - images, power spectrum + metrics, explanation
+    fig = plt.figure(figsize=(18, 14))
+
+    # Use GridSpec for flexible layout
+    gs = fig.add_gridspec(3, 3, height_ratios=[1, 1, 0.6], hspace=0.3, wspace=0.25)
+
+    # --- Row 1: Ground Truth, Input (Blurry), Regularization Only ---
+    ax1 = fig.add_subplot(gs[0, 0])
+    im1 = ax1.imshow(
+        img_truth, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax1.set_title('Ground Truth (Sharp)', fontweight='bold', fontsize=11)
+    ax1.set_xlabel('Longitude')
+    ax1.set_ylabel('Latitude')
+
+    ax2 = fig.add_subplot(gs[0, 1])
+    ax2.imshow(
+        img_blurry, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax2.set_title('Input (Blurry)', fontweight='bold', fontsize=11)
+    ax2.set_xlabel('Longitude')
+
+    ax3 = fig.add_subplot(gs[0, 2])
+    ax3.imshow(
+        img_reg, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax3.set_title('Regularization Only', fontweight='bold', fontsize=11)
+    ax3.set_xlabel('Longitude')
+
+    # --- Row 2: Spectral Constraint, Bispectrum Constraint, Power Spectrum plot ---
+    ax4 = fig.add_subplot(gs[1, 0])
+    ax4.imshow(
+        img_spec, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax4.set_title('Spectral Constraint', fontweight='bold', fontsize=11, color='#CC6600')
+    ax4.set_xlabel('Longitude')
+    ax4.set_ylabel('Latitude')
+
+    ax5 = fig.add_subplot(gs[1, 1])
+    ax5.imshow(
+        img_bsp, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+    )
+    ax5.set_title('Bispectrum Constraint', fontweight='bold', fontsize=11, color='green')
+    ax5.set_xlabel('Longitude')
+
+    # Power spectrum comparison
+    ax6 = fig.add_subplot(gs[1, 2])
+    ax6.plot(ps_truth, 'k-', linewidth=2, marker='o', markersize=8, label='Ground Truth')
+    ax6.plot(ps_blurry, 'r--', linewidth=2, marker='s', markersize=7, label='Input (Blurry)')
+    ax6.plot(ps_reg, 'b-', linewidth=1.5, marker='^', markersize=6, label='Reg. Only')
+    ax6.plot(
+        ps_spec,
+        color='#CC6600',
+        linestyle='-',
+        linewidth=2,
+        marker='x',
+        markersize=8,
+        label='Spectral',
+    )
+    ax6.plot(ps_bsp, 'g-', linewidth=2, marker='d', markersize=7, label='Bispectrum')
+    ax6.set_yscale('log')
+    ax6.set_xlabel('Degree $\\ell$', fontsize=10)
+    ax6.set_ylabel('Power $\\sum_m |a_{\\ell m}|^2$', fontsize=10)
+    ax6.set_title('Power Spectrum Comparison', fontweight='bold', fontsize=11)
+    ax6.legend(loc='upper right', fontsize=9)
+    ax6.grid(True, alpha=0.3)
+
+    # --- Row 3: Metrics table and explanation text ---
+    # Left panel: Metrics table
+    ax_table = fig.add_subplot(gs[2, 0])
+    ax_table.axis('off')
+
+    # Create table data
+    table_data = [
+        ['Model', 'Power MSE', 'Bispec MSE'],
+        [
+            'Input (Blurry)',
+            f'{metrics["Input (Blurry)"]["power_mse"]:.2e}',
+            f'{metrics["Input (Blurry)"]["bispec_mse"]:.2e}',
+        ],
+        [
+            'Reg. Only',
+            f'{metrics["Reg. Only"]["power_mse"]:.2e}',
+            f'{metrics["Reg. Only"]["bispec_mse"]:.2e}',
+        ],
+        [
+            'Spectral',
+            f'{metrics["Spectral"]["power_mse"]:.2e}',
+            f'{metrics["Spectral"]["bispec_mse"]:.2e}',
+        ],
+        [
+            'Bispectrum',
+            f'{metrics["Bispectrum"]["power_mse"]:.2e}',
+            f'{metrics["Bispectrum"]["bispec_mse"]:.2e}',
+        ],
+    ]
+
+    # Color cells based on values (green=good, red=bad)
+    cell_colors = [['lightgray'] * 3]  # Header
+    for _i, name in enumerate(['Input (Blurry)', 'Reg. Only', 'Spectral', 'Bispectrum']):
+        row_colors = ['white']  # Name column
+        # Power MSE color
+        if metrics[name]['power_mse'] < 1e-6:
+            row_colors.append('#90EE90')  # Light green
+        elif metrics[name]['power_mse'] < 1e-4:
+            row_colors.append('#FFFFE0')  # Light yellow
+        else:
+            row_colors.append('#FFB6C1')  # Light red
+        # Bispectrum MSE color
+        if metrics[name]['bispec_mse'] < 1e-6:
+            row_colors.append('#90EE90')  # Light green
+        elif metrics[name]['bispec_mse'] < 1e-4:
+            row_colors.append('#FFFFE0')  # Light yellow
+        else:
+            row_colors.append('#FFB6C1')  # Light red
+        cell_colors.append(row_colors)
+
+    table = ax_table.table(
+        cellText=table_data,
+        cellColours=cell_colors,
+        loc='center',
+        cellLoc='center',
+        colWidths=[0.35, 0.32, 0.32],
+    )
+    table.auto_set_font_size(False)
+    table.set_fontsize(10)
+    table.scale(1.2, 1.8)
+    ax_table.set_title('MSE vs Ground Truth', fontweight='bold', fontsize=11, pad=10)
+
+    # Middle panel: Key insight box
+    ax_insight = fig.add_subplot(gs[2, 1])
+    ax_insight.axis('off')
+
+    insight_text = (
+        'KEY INSIGHT\n'
+        '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n'
+        'Spectral constraint matches power\n'
+        'spectrum perfectly (MSE ≈ 10⁻⁹)\n'
+        'but spatial structure is WRONG!\n\n'
+        'Why? Power spectrum = amplitudes only.\n'
+        'It discards phase information.\n\n'
+        'Bispectrum encodes phase relationships\n'
+        'between frequencies → recovers structure.'
+    )
+    ax_insight.text(
+        0.5,
+        0.5,
+        insight_text,
+        transform=ax_insight.transAxes,
+        fontsize=10,
+        verticalalignment='center',
+        horizontalalignment='center',
+        fontfamily='monospace',
+        bbox={
+            'boxstyle': 'round,pad=0.5',
+            'facecolor': '#E8F4E8',
+            'edgecolor': 'green',
+            'linewidth': 2,
+        },
+    )
+
+    # Right panel: Mathematical explanation
+    ax_math = fig.add_subplot(gs[2, 2])
+    ax_math.axis('off')
+
+    math_text = (
+        'MATHEMATICAL INTERPRETATION\n'
+        '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n'
+        'Power Spectrum:\n'
+        '  $P_\\ell = \\sum_m |a_{\\ell m}|^2$\n'
+        '  → Rotation invariant\n'
+        '  → Loses phase info\n\n'
+        'Bispectrum:\n'
+        '  $B_{\\ell_1 \\ell_2 \\ell_3} = '
+        '\\sum_{m_i} C^{\\ell_3}_{\\ell_1 \\ell_2} a_{\\ell_1 m_1} a_{\\ell_2 m_2} a^*_{\\ell_3 m_3}$\n'
+        '  → Rotation invariant\n'
+        '  → Preserves phase coherence'
+    )
+    ax_math.text(
+        0.5,
+        0.5,
+        math_text,
+        transform=ax_math.transAxes,
+        fontsize=9,
+        verticalalignment='center',
+        horizontalalignment='center',
+        fontfamily='monospace',
+        bbox={
+            'boxstyle': 'round,pad=0.5',
+            'facecolor': '#F0F0FF',
+            'edgecolor': 'blue',
+            'linewidth': 2,
+        },
+    )
+
+    # Add colorbar
+    cbar_ax = fig.add_axes([0.02, 0.35, 0.012, 0.55])
+    fig.colorbar(im1, cax=cbar_ax)
+
+    plt.suptitle(
+        'Power Spectrum vs Bispectrum: Why Phase Coherence Matters for Signal Recovery',
+        fontsize=14,
+        fontweight='bold',
+        y=0.98,
+    )
+
+    plt.savefig('deblur_results.png', dpi=150, bbox_inches='tight')
+    plt.close()
+    print('Saved plot to deblur_results.png')
+
+
+if __name__ == '__main__':
+    results = run_deblurring_demo()
+    analyze_results(*results)

From 8752c4da6a754579642851ec1f34adc9e4f13b13 Mon Sep 17 00:00:00 2001
From: Johan Mathe <johan.mathe@gmail.com>
Date: Sun, 18 Jan 2026 12:26:48 -0800
Subject: [PATCH 3/6] Move demo

---
 so3_3_trains.py => examples/so3_3_trains.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename so3_3_trains.py => examples/so3_3_trains.py (100%)

diff --git a/so3_3_trains.py b/examples/so3_3_trains.py
similarity index 100%
rename from so3_3_trains.py
rename to examples/so3_3_trains.py

From 65900a5895bccdc8063562ed5abcbc5706386c9a Mon Sep 17 00:00:00 2001
From: Johan Mathe <johan.mathe@gmail.com>
Date: Sun, 18 Jan 2026 20:35:42 -0800
Subject: [PATCH 4/6] Add 3rd demo

---
 examples/so3_deblur_3.py    | 583 ++++++++++++++++++++++++++++++++++++
 examples/so3_deblur_demo.py | 532 +++++++++++++++++++++++++++-----
 2 files changed, 1034 insertions(+), 81 deletions(-)
 create mode 100644 examples/so3_deblur_3.py

diff --git a/examples/so3_deblur_3.py b/examples/so3_deblur_3.py
new file mode 100644
index 0000000..02b4c5d
--- /dev/null
+++ b/examples/so3_deblur_3.py
@@ -0,0 +1,583 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.optim as optim
+from torch_harmonics import InverseRealSHT
+
+from bispectrum import SO3onS2
+
+
+def power_spectrum(coeffs: torch.Tensor) -> torch.Tensor:
+    """Compute power spectrum from SH coefficients.
+
+    Args:
+        coeffs: [B, L, M] complex tensor
+
+    Returns:
+        [B, L] tensor of power per degree
+    """
+    return torch.sum(torch.abs(coeffs) ** 2, dim=-1)
+
+
+def correlation_per_ell(coeffs: torch.Tensor, ref: torch.Tensor, eps: float = 1e-12) -> np.ndarray:
+    """Compute correlation coefficient C_ℓ between recovered and ground truth coefficients.
+
+    The correlation coefficient measures phase alignment at each degree ℓ:
+        C_ℓ = Re(∑_m â_ℓm · a*_ℓm) / √(∑_m |â_ℓm|² · ∑_m |a_ℓm|²)
+
+    Args:
+        coeffs: [B, L, M] recovered complex coefficients
+        ref: [B, L, M] ground truth complex coefficients
+        eps: small constant to avoid division by zero
+
+    Returns:
+        [L] array of correlation coefficients per degree ℓ
+        C_ℓ = 1.0: perfect phase alignment
+        C_ℓ = 0.0: random/uncorrelated phases (even if power spectrum matches!)
+    """
+    # Cross-correlation: ∑_m â_ℓm · a*_ℓm
+    cross = torch.sum(coeffs * ref.conj(), dim=-1)  # [B, L]
+
+    # Power of each: ∑_m |a_ℓm|²
+    power_coeffs = torch.sum(torch.abs(coeffs) ** 2, dim=-1)  # [B, L]
+    power_ref = torch.sum(torch.abs(ref) ** 2, dim=-1)  # [B, L]
+
+    # Normalize
+    denom = torch.sqrt(power_coeffs * power_ref + eps)
+    corr = (cross.real / denom).squeeze(0)  # [L]
+
+    return corr.detach().cpu().numpy()
+
+
+def run_deblurring_demo():
+    """
+    Demo: Recovering high-frequency structure using bispectrum constraints.
+
+    Scenario: We observe a blurry signal and want to reconstruct the sharp original.
+    - Model A: Only regularization toward input (no structural prior) - stays blurry
+    - Model B: Regularization + bispectrum constraint - recovers high-frequency structure
+    - Model C: Regularization + spectral constraint - matches power but NOT phases
+
+    The bispectrum acts as a "structural fingerprint" that encodes phase relationships
+    between different frequency components. Model C demonstrates that matching
+    power spectrum alone is insufficient - you need phase coherence from bispectrum.
+    """
+    # --- Configuration ---
+    LMAX = 5  # Resolution (keep low for speed)
+    LAMBDA_REG = 0.01  # Regularization toward input (prevents divergence)
+    LAMBDA_BSP = 10.0  # Strength of bispectrum constraint
+    LAMBDA_SPEC = 10.0  # Strength of spectral (power spectrum) constraint
+    STEPS = 400
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    bsp_module = SO3onS2(lmax=LMAX).to(device)
+
+    # --- 1. Generate "Ground Truth" (Sharp Signal) ---
+    # Power law decay l^-2 with meaningful high-frequency content
+    torch.manual_seed(42)  # Reproducibility
+    truth_coeffs = torch.randn(1, LMAX + 1, LMAX + 1, dtype=torch.complex64, device=device)
+    for l in range(LMAX + 1):
+        truth_coeffs[:, l, :] *= 1.0 / (1.0 + l**2)
+
+    # Pre-compute targets
+    with torch.no_grad():
+        truth_invariants = bsp_module(truth_coeffs)
+        truth_power = power_spectrum(truth_coeffs)
+
+    # --- 2. Generate "Input" (Corrupted Observation) ---
+    # For ℓ < 3: keep truth coefficients (low frequencies intact)
+    # For ℓ >= 3: keep correct AMPLITUDE but RANDOMIZE PHASES
+    # This simulates a measurement where we know the power spectrum but lost phase info
+    input_coeffs = truth_coeffs.clone().detach()
+    for l in range(3, LMAX + 1):
+        # Get the amplitude (magnitude) of each coefficient
+        amplitude = torch.abs(truth_coeffs[:, l, :])
+        # Generate random phases
+        random_phase = torch.exp(2j * np.pi * torch.rand_like(amplitude, dtype=torch.float32)).to(
+            device
+        )
+        # Create new coefficients with correct amplitude but random phase
+        input_coeffs[:, l, :] = amplitude * random_phase
+
+    # --- 3. Optimization: Compare three approaches ---
+    # Neither model has access to ground truth pixels!
+
+    # Model A: Only stays close to input (no structural guidance)
+    coeffs_A = input_coeffs.clone().detach().requires_grad_(True)
+    opt_A = optim.Adam([coeffs_A], lr=0.01)
+
+    # Model B: Stays close to input + matches bispectrum structure
+    coeffs_B = input_coeffs.clone().detach().requires_grad_(True)
+    opt_B = optim.Adam([coeffs_B], lr=0.01)
+
+    # Model C: Stays close to input + matches power spectrum (spectral loss)
+    coeffs_C = input_coeffs.clone().detach().requires_grad_(True)
+    opt_C = optim.Adam([coeffs_C], lr=0.01)
+
+    print('Training starts...')
+    print('Model A: Regularization only (no structural prior)')
+    print('Model B: Regularization + Bispectrum constraint')
+    print('Model C: Regularization + Spectral (power spectrum) constraint')
+    print('-' * 70)
+
+    for step in range(STEPS):
+        # --- Model A: Only regularization toward input ---
+        opt_A.zero_grad()
+        loss_reg_A = torch.mean(torch.abs(coeffs_A - input_coeffs) ** 2)
+        loss_A = loss_reg_A
+        loss_A.backward()
+        opt_A.step()
+
+        # --- Model B: Regularization + Bispectrum ---
+        opt_B.zero_grad()
+        loss_reg_B = torch.mean(torch.abs(coeffs_B - input_coeffs) ** 2)
+        pred_invariants = bsp_module(coeffs_B)
+        loss_bsp = torch.mean(torch.abs(pred_invariants - truth_invariants) ** 2)
+        loss_B = LAMBDA_REG * loss_reg_B + LAMBDA_BSP * loss_bsp
+        loss_B.backward()
+        opt_B.step()
+
+        # --- Model C: Regularization + Spectral (power spectrum) ---
+        opt_C.zero_grad()
+        loss_reg_C = torch.mean(torch.abs(coeffs_C - input_coeffs) ** 2)
+        pred_power = power_spectrum(coeffs_C)
+        loss_spec = torch.mean((pred_power - truth_power) ** 2)
+        loss_C = LAMBDA_REG * loss_reg_C + LAMBDA_SPEC * loss_spec
+        loss_C.backward()
+        opt_C.step()
+
+        if step % 100 == 0:
+            print(
+                f'Step {step}: '
+                f'Loss A={loss_A.item():.6f} | '
+                f'Loss B={loss_B.item():.6f} (bsp={loss_bsp.item():.6f}) | '
+                f'Loss C={loss_C.item():.6f} (spec={loss_spec.item():.6f})'
+            )
+
+    print('-' * 70)
+    print('Done!')
+
+    return truth_coeffs, input_coeffs, coeffs_A.detach(), coeffs_B.detach(), coeffs_C.detach()
+
+
+# --- Spherical Rendering ---
+def sh_to_spatial(coeffs: torch.Tensor, nlat: int = 64, nlon: int = 128) -> np.ndarray:
+    """Convert SH coefficients to a spatial grid on the sphere using torch-harmonics.
+
+    Args:
+        coeffs: (1, lmax+1, mmax+1) complex tensor with SH coefficients
+        nlat: number of latitude points
+        nlon: number of longitude points
+
+    Returns:
+        (nlat, nlon) real array of the function on the sphere
+    """
+    lmax = coeffs.shape[1]
+    mmax = coeffs.shape[2]
+
+    # Create inverse SHT
+    isht = InverseRealSHT(nlat, nlon, lmax=lmax, mmax=mmax, grid='equiangular', norm='ortho')
+    isht = isht.to(coeffs.device)
+
+    # Transform to spatial domain
+    spatial = isht(coeffs.to(torch.complex64)).squeeze(0).detach().cpu().numpy()
+
+    return spatial
+
+
+# --- Analysis Helper ---
+def analyze_results(
+    truth: torch.Tensor,
+    blurry: torch.Tensor,
+    res_reg_only: torch.Tensor,
+    res_bispectrum: torch.Tensor,
+    res_spectral: torch.Tensor,
+) -> None:
+    """
+    Combined visualization: spatial images + power spectrum + correlation coefficient C_ℓ.
+
+    The correlation coefficient C_ℓ is the "honest" metric that proves phase recovery:
+    - Power spectrum only measures amplitudes |a_ℓm|²
+    - C_ℓ measures phase alignment: if C_ℓ ≈ 1, phases match ground truth
+    - Spectral constraint can achieve perfect power match with C_ℓ → 0 (random phases!)
+    - Bispectrum constraint should achieve C_ℓ → 1 (phase coherence recovered)
+    """
+
+    def get_power_spectrum(coeffs: torch.Tensor) -> np.ndarray:
+        return torch.sum(torch.abs(coeffs) ** 2, dim=-1).squeeze().detach().cpu().numpy()
+
+    # --- Compute Correlation Coefficients C_ℓ ---
+    print('\nComputing correlation coefficients C_ℓ (phase alignment metric)...')
+    corr_blurry = correlation_per_ell(blurry, truth)
+    corr_reg = correlation_per_ell(res_reg_only, truth)
+    corr_bsp = correlation_per_ell(res_bispectrum, truth)
+    corr_spec = correlation_per_ell(res_spectral, truth)
+
+    lmax = truth.shape[1] - 1
+    ell_values = np.arange(lmax + 1)
+
+    # Print correlation table
+    print('\n' + '=' * 60)
+    print('CORRELATION COEFFICIENT C_ℓ vs Ground Truth')
+    print('(C_ℓ = 1.0: perfect phase match, C_ℓ = 0: random phases)')
+    print('=' * 60)
+    print(f'{"ℓ":>3} | {"Blurry":>10} | {"Reg Only":>10} | {"Spectral":>10} | {"Bispectrum":>10}')
+    print('-' * 60)
+    for ell in ell_values:
+        print(
+            f'{ell:>3} | {corr_blurry[ell]:>10.4f} | {corr_reg[ell]:>10.4f} | '
+            f'{corr_spec[ell]:>10.4f} | {corr_bsp[ell]:>10.4f}'
+        )
+    print('=' * 60)
+
+    # Mean correlation (excluding ℓ=0 which is always 1)
+    print('\nMean C_ℓ (ℓ > 0):')
+    print(f'  Blurry:     {np.mean(corr_blurry[1:]):.4f}')
+    print(f'  Reg Only:   {np.mean(corr_reg[1:]):.4f}')
+    print(f'  Spectral:   {np.mean(corr_spec[1:]):.4f}')
+    print(f'  Bispectrum: {np.mean(corr_bsp[1:]):.4f}')
+
+    # Render all signals to spatial domain
+    print('\nRendering spherical harmonics to spatial domain...')
+    img_truth = sh_to_spatial(truth)
+    img_blurry = sh_to_spatial(blurry)
+    img_reg = sh_to_spatial(res_reg_only)
+    img_bsp = sh_to_spatial(res_bispectrum)
+    img_spec = sh_to_spatial(res_spectral)
+
+    # Compute power spectra for plotting
+    ps_truth = get_power_spectrum(truth)
+    ps_blurry = get_power_spectrum(blurry)
+    get_power_spectrum(res_reg_only)
+    ps_bsp = get_power_spectrum(res_bispectrum)
+    ps_spec = get_power_spectrum(res_spectral)
+
+    # --- Compute MSE metrics for table ---
+    device = truth.device
+    bsp_module = SO3onS2(lmax=lmax).to(device)
+
+    def power_mse(coeffs: torch.Tensor, ref: torch.Tensor) -> float:
+        ps1 = power_spectrum(coeffs)
+        ps2 = power_spectrum(ref)
+        return torch.mean((ps1 - ps2) ** 2).item()
+
+    def bispec_mse(coeffs: torch.Tensor, ref: torch.Tensor) -> float:
+        with torch.no_grad():
+            inv1 = bsp_module(coeffs)
+            inv2 = bsp_module(ref)
+        return torch.mean(torch.abs(inv1 - inv2) ** 2).item()
+
+    # Compute metrics
+    metrics: dict[str, dict[str, float]] = {}
+    models_for_metrics = [
+        ('Input', blurry),
+        ('Reg. Only', res_reg_only),
+        ('Spectral', res_spectral),
+        ('Bispectrum', res_bispectrum),
+    ]
+    for name, coeffs in models_for_metrics:
+        metrics[name] = {
+            'power_mse': power_mse(coeffs, truth),
+            'bispec_mse': bispec_mse(coeffs, truth),
+            'mean_corr': float(np.mean(correlation_per_ell(coeffs, truth)[1:])),
+        }
+
+    # Common colormap range for fair comparison
+    all_imgs = [img_truth, img_blurry, img_reg, img_bsp, img_spec]
+    vmin = min(img.min() for img in all_imgs)
+    vmax = max(img.max() for img in all_imgs)
+
+    # Create figure with GridSpec for flexible layout
+    # Layout: 4 rows
+    #   Row 0: 5 small images (truth, input, reg, spectral, bispectrum)
+    #   Row 1: Power spectrum (left 2/3) + Correlation C_ℓ (right 1/3)
+    #   Row 2: Metrics table (left) + Key insight (middle) + Math (right)
+    fig = plt.figure(figsize=(20, 14))
+    gs = fig.add_gridspec(
+        3,
+        5,
+        height_ratios=[0.7, 1.0, 0.7],
+        width_ratios=[1, 1, 1, 1, 1],
+        hspace=0.35,
+        wspace=0.2,
+    )
+
+    # --- Row 0: 5 spatial images ---
+    images = [
+        (img_truth, 'Ground Truth', 'black'),
+        (img_blurry, 'Input (Random Phases)', 'red'),
+        (img_reg, 'Reg. Only', 'blue'),
+        (img_spec, 'Spectral', '#CC6600'),
+        (img_bsp, 'Bispectrum', 'green'),
+    ]
+    for i, (img, title, color) in enumerate(images):
+        ax = fig.add_subplot(gs[0, i])
+        im = ax.imshow(
+            img, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+        )
+        ax.set_title(title, fontweight='bold', fontsize=10, color=color)
+        ax.set_xlabel('Lon', fontsize=8)
+        if i == 0:
+            ax.set_ylabel('Lat', fontsize=8)
+        ax.tick_params(labelsize=7)
+
+    # Add colorbar to the right of images
+    cbar_ax = fig.add_axes([0.92, 0.72, 0.01, 0.15])
+    fig.colorbar(im, cax=cbar_ax, label='Amplitude')
+
+    # --- Row 1: Power Spectrum (left 3 cols) + Correlation C_ℓ (right 2 cols) ---
+    ax_ps = fig.add_subplot(gs[1, 0:3])
+    ax_ps.plot(
+        ell_values, ps_truth, 'k-', linewidth=2, marker='o', markersize=8, label='Ground Truth'
+    )
+    ax_ps.plot(ell_values, ps_blurry, 'r--', linewidth=2, marker='s', markersize=7, label='Input')
+    ax_ps.plot(
+        ell_values,
+        ps_spec,
+        color='#CC6600',
+        linestyle='-',
+        linewidth=2,
+        marker='x',
+        markersize=8,
+        label='Spectral',
+    )
+    ax_ps.plot(ell_values, ps_bsp, 'g-', linewidth=2, marker='d', markersize=7, label='Bispectrum')
+    ax_ps.set_yscale('log')
+    ax_ps.set_xlabel('Degree $\\ell$', fontsize=11)
+    ax_ps.set_ylabel('Power $P_\\ell = \\sum_m |a_{\\ell m}|^2$', fontsize=11)
+    ax_ps.set_title('Power Spectrum (Amplitude Only)', fontweight='bold', fontsize=11)
+    ax_ps.legend(loc='upper right', fontsize=9)
+    ax_ps.grid(True, alpha=0.3)
+    ax_ps.set_xticks(ell_values)
+
+    # Correlation coefficient C_ℓ (THE KEY METRIC!)
+    ax_corr = fig.add_subplot(gs[1, 3:5])
+
+    # Add shaded background regions for visual clarity
+    ax_corr.axhspan(0.8, 1.15, alpha=0.15, color='green', label='_nolegend_')
+    ax_corr.axhspan(-0.55, 0.3, alpha=0.1, color='red', label='_nolegend_')
+    ax_corr.axhspan(0.3, 0.8, alpha=0.08, color='orange', label='_nolegend_')
+
+    # Reference lines
+    ax_corr.axhline(y=1.0, color='#2E7D32', linestyle='-', linewidth=1.5, alpha=0.6)
+    ax_corr.axhline(y=0.0, color='#B71C1C', linestyle='-', linewidth=1.5, alpha=0.6)
+
+    # Fill between bispectrum and spectral to highlight the gap
+    ax_corr.fill_between(
+        ell_values, corr_spec, corr_bsp, alpha=0.3, color='#4CAF50', label='_nolegend_'
+    )
+
+    # Plot lines
+    ax_corr.plot(
+        ell_values,
+        corr_spec,
+        color='#D84315',
+        linestyle='-',
+        linewidth=2.5,
+        marker='o',
+        markersize=10,
+        markerfacecolor='white',
+        markeredgewidth=2.5,
+        label='Input / Spectral',
+        zorder=5,
+    )
+    ax_corr.plot(
+        ell_values,
+        corr_bsp,
+        color='#2E7D32',
+        linestyle='-',
+        linewidth=3,
+        marker='D',
+        markersize=11,
+        markerfacecolor='#A5D6A7',
+        markeredgewidth=2.5,
+        markeredgecolor='#1B5E20',
+        label='Bispectrum',
+        zorder=6,
+    )
+
+    ax_corr.set_xlabel('Degree $\\ell$', fontsize=11)
+    ax_corr.set_ylabel('$C_\\ell$', fontsize=12)
+    ax_corr.set_title(
+        'Correlation Coefficient $C_\\ell$ (Phase Alignment)', fontweight='bold', fontsize=11
+    )
+
+    ax_corr.legend(loc='lower left', fontsize=9, framealpha=0.95, fancybox=True)
+    ax_corr.grid(True, alpha=0.4, linestyle='--', linewidth=0.8)
+    ax_corr.set_ylim(-0.55, 1.15)
+    ax_corr.set_xlim(-0.3, lmax + 0.3)
+    ax_corr.set_xticks(ell_values)
+
+    # Annotations
+    ax_corr.annotate(
+        '$C_\\ell=1$: phases match',
+        xy=(0.02, 1.05),
+        fontsize=8,
+        color='#1B5E20',
+        fontweight='bold',
+    )
+    ax_corr.annotate(
+        '$C_\\ell=0$: random', xy=(0.02, 0.05), fontsize=8, color='#B71C1C', fontweight='bold'
+    )
+
+    # Arrow showing gap
+    mid_ell = 4
+    gap_y = (corr_bsp[mid_ell] + corr_spec[mid_ell]) / 2
+    ax_corr.annotate(
+        '',
+        xy=(mid_ell, corr_bsp[mid_ell] - 0.02),
+        xytext=(mid_ell, corr_spec[mid_ell] + 0.02),
+        arrowprops={'arrowstyle': '<->', 'color': '#1565C0', 'lw': 2.5},
+    )
+    ax_corr.annotate(
+        'Phase\nrecovery!',
+        xy=(mid_ell + 0.15, gap_y),
+        fontsize=10,
+        ha='left',
+        va='center',
+        color='#1565C0',
+        fontweight='bold',
+    )
+
+    # --- Row 2: Metrics table + Key insights + C_ℓ interpretation ---
+    # Left: Metrics table
+    ax_table = fig.add_subplot(gs[2, 0:2])
+    ax_table.axis('off')
+
+    table_data = [
+        ['Model', 'Power MSE', 'Bispec MSE', 'Mean $C_\\ell$'],
+        [
+            'Input',
+            f'{metrics["Input"]["power_mse"]:.2e}',
+            f'{metrics["Input"]["bispec_mse"]:.2e}',
+            f'{metrics["Input"]["mean_corr"]:.3f}',
+        ],
+        [
+            'Spectral',
+            f'{metrics["Spectral"]["power_mse"]:.2e}',
+            f'{metrics["Spectral"]["bispec_mse"]:.2e}',
+            f'{metrics["Spectral"]["mean_corr"]:.3f}',
+        ],
+        [
+            'Bispectrum',
+            f'{metrics["Bispectrum"]["power_mse"]:.2e}',
+            f'{metrics["Bispectrum"]["bispec_mse"]:.2e}',
+            f'{metrics["Bispectrum"]["mean_corr"]:.3f}',
+        ],
+    ]
+
+    # Color cells
+    cell_colors = [['#E0E0E0'] * 4]  # Header
+    for name in ['Input', 'Spectral', 'Bispectrum']:
+        row = ['white']
+        # Power MSE
+        row.append('#90EE90' if metrics[name]['power_mse'] < 1e-6 else '#FFB6C1')
+        # Bispec MSE
+        row.append('#90EE90' if metrics[name]['bispec_mse'] < 1e-6 else '#FFB6C1')
+        # Mean C_ℓ
+        row.append('#90EE90' if metrics[name]['mean_corr'] > 0.8 else '#FFB6C1')
+        cell_colors.append(row)
+
+    table = ax_table.table(
+        cellText=table_data,
+        cellColours=cell_colors,
+        loc='center',
+        cellLoc='center',
+        colWidths=[0.25, 0.25, 0.25, 0.25],
+    )
+    table.auto_set_font_size(False)
+    table.set_fontsize(10)
+    table.scale(1.0, 2.0)
+    ax_table.set_title('Metrics vs Ground Truth', fontweight='bold', fontsize=11, pad=15)
+
+    # Middle: Key insight
+    ax_insight = fig.add_subplot(gs[2, 2])
+    ax_insight.axis('off')
+    insight_text = (
+        'KEY INSIGHT\n'
+        '━━━━━━━━━━━━━━━━━━━━━━━\n\n'
+        'Spectral constraint:\n'
+        '  Power MSE → 0 (perfect!)\n'
+        '  But Mean $C_\\ell$ ≈ 0.3 (bad)\n\n'
+        'Power spectrum = |$a_{\\ell m}$|²\n'
+        'discards phase information.\n\n'
+        'Same amplitudes,\n'
+        'wrong spatial structure!'
+    )
+    ax_insight.text(
+        0.5,
+        0.5,
+        insight_text,
+        transform=ax_insight.transAxes,
+        fontsize=10,
+        va='center',
+        ha='center',
+        fontfamily='monospace',
+        bbox={
+            'boxstyle': 'round,pad=0.4',
+            'facecolor': '#FFF3E0',
+            'edgecolor': '#E65100',
+            'linewidth': 2,
+        },
+    )
+
+    # Right: C_ℓ interpretation
+    ax_interp = fig.add_subplot(gs[2, 3:5])
+    ax_interp.axis('off')
+    interp_text = (
+        'WHY $C_\\ell$ MATTERS\n'
+        '━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n'
+        'Correlation coefficient:\n'
+        '$C_\\ell = \\frac{\\sum_m \\hat{a}_{\\ell m} a^*_{\\ell m}}'
+        '{\\sqrt{\\sum_m |\\hat{a}|^2 \\sum_m |a|^2}}$\n\n'
+        '$C_\\ell = 1$: phases perfectly aligned\n'
+        '$C_\\ell = 0$: phases uncorrelated\n\n'
+        'Bispectrum encodes phase\n'
+        'relationships → recovers $C_\\ell$ ≈ 1\n'
+        'even when starting from random!'
+    )
+    ax_interp.text(
+        0.5,
+        0.5,
+        interp_text,
+        transform=ax_interp.transAxes,
+        fontsize=10,
+        va='center',
+        ha='center',
+        fontfamily='monospace',
+        bbox={
+            'boxstyle': 'round,pad=0.4',
+            'facecolor': '#E8F5E9',
+            'edgecolor': '#2E7D32',
+            'linewidth': 2,
+        },
+    )
+
+    plt.suptitle(
+        'Bispectrum Recovers Phase Coherence — Proven by Independent Metric $C_\\ell$',
+        fontsize=14,
+        fontweight='bold',
+        y=0.98,
+    )
+
+    plt.savefig('deblur_results.png', dpi=150, bbox_inches='tight')
+    plt.close()
+    print('\nSaved plot to deblur_results.png')
+
+    # Final summary
+    print('\n' + '=' * 70)
+    print('KEY RESULT:')
+    print('=' * 70)
+    print('Spectral constraint matches power spectrum perfectly, but C_ℓ drops off')
+    print('  → Power spectrum only captures |a_ℓm|², losing phase information')
+    print('  → Result: correct amplitudes, but wrong spatial structure')
+    print()
+    print('Bispectrum constraint maintains high C_ℓ across all degrees')
+    print('  → Bispectrum encodes phase relationships between frequencies')
+    print('  → Result: correct amplitudes AND correct spatial structure')
+    print('=' * 70)
+
+
+if __name__ == '__main__':
+    results = run_deblurring_demo()
+    analyze_results(*results)
diff --git a/examples/so3_deblur_demo.py b/examples/so3_deblur_demo.py
index bc5ac94..02b4c5d 100644
--- a/examples/so3_deblur_demo.py
+++ b/examples/so3_deblur_demo.py
@@ -7,6 +7,48 @@
 from bispectrum import SO3onS2
 
 
+def power_spectrum(coeffs: torch.Tensor) -> torch.Tensor:
+    """Compute power spectrum from SH coefficients.
+
+    Args:
+        coeffs: [B, L, M] complex tensor
+
+    Returns:
+        [B, L] tensor of power per degree
+    """
+    return torch.sum(torch.abs(coeffs) ** 2, dim=-1)
+
+
+def correlation_per_ell(coeffs: torch.Tensor, ref: torch.Tensor, eps: float = 1e-12) -> np.ndarray:
+    """Compute correlation coefficient C_ℓ between recovered and ground truth coefficients.
+
+    The correlation coefficient measures phase alignment at each degree ℓ:
+        C_ℓ = Re(∑_m â_ℓm · a*_ℓm) / √(∑_m |â_ℓm|² · ∑_m |a_ℓm|²)
+
+    Args:
+        coeffs: [B, L, M] recovered complex coefficients
+        ref: [B, L, M] ground truth complex coefficients
+        eps: small constant to avoid division by zero
+
+    Returns:
+        [L] array of correlation coefficients per degree ℓ
+        C_ℓ = 1.0: perfect phase alignment
+        C_ℓ = 0.0: random/uncorrelated phases (even if power spectrum matches!)
+    """
+    # Cross-correlation: ∑_m â_ℓm · a*_ℓm
+    cross = torch.sum(coeffs * ref.conj(), dim=-1)  # [B, L]
+
+    # Power of each: ∑_m |a_ℓm|²
+    power_coeffs = torch.sum(torch.abs(coeffs) ** 2, dim=-1)  # [B, L]
+    power_ref = torch.sum(torch.abs(ref) ** 2, dim=-1)  # [B, L]
+
+    # Normalize
+    denom = torch.sqrt(power_coeffs * power_ref + eps)
+    corr = (cross.real / denom).squeeze(0)  # [L]
+
+    return corr.detach().cpu().numpy()
+
+
 def run_deblurring_demo():
     """
     Demo: Recovering high-frequency structure using bispectrum constraints.
@@ -14,15 +56,17 @@ def run_deblurring_demo():
     Scenario: We observe a blurry signal and want to reconstruct the sharp original.
     - Model A: Only regularization toward input (no structural prior) - stays blurry
     - Model B: Regularization + bispectrum constraint - recovers high-frequency structure
+    - Model C: Regularization + spectral constraint - matches power but NOT phases
 
     The bispectrum acts as a "structural fingerprint" that encodes phase relationships
-    between different frequency components, allowing recovery of detail that pure
-    smoothness-based methods cannot achieve.
+    between different frequency components. Model C demonstrates that matching
+    power spectrum alone is insufficient - you need phase coherence from bispectrum.
     """
     # --- Configuration ---
     LMAX = 5  # Resolution (keep low for speed)
     LAMBDA_REG = 0.01  # Regularization toward input (prevents divergence)
     LAMBDA_BSP = 10.0  # Strength of bispectrum constraint
+    LAMBDA_SPEC = 10.0  # Strength of spectral (power spectrum) constraint
     STEPS = 400
 
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -35,18 +79,27 @@ def run_deblurring_demo():
     for l in range(LMAX + 1):
         truth_coeffs[:, l, :] *= 1.0 / (1.0 + l**2)
 
-    # Pre-compute the "Structural Fingerprint" (rotation-invariant bispectrum)
-    # In practice, this could come from physical constraints or a reference signal
+    # Pre-compute targets
     with torch.no_grad():
         truth_invariants = bsp_module(truth_coeffs)
+        truth_power = power_spectrum(truth_coeffs)
 
-    # --- 2. Generate "Input" (Blurry Observation) ---
-    # Heavily dampen high frequencies to simulate degraded observation
+    # --- 2. Generate "Input" (Corrupted Observation) ---
+    # For ℓ < 3: keep truth coefficients (low frequencies intact)
+    # For ℓ >= 3: keep correct AMPLITUDE but RANDOMIZE PHASES
+    # This simulates a measurement where we know the power spectrum but lost phase info
     input_coeffs = truth_coeffs.clone().detach()
     for l in range(3, LMAX + 1):
-        input_coeffs[:, l, :] *= 0.1  # Kill high frequencies
-
-    # --- 3. Optimization: Regularization Only vs. Regularization + Bispectrum ---
+        # Get the amplitude (magnitude) of each coefficient
+        amplitude = torch.abs(truth_coeffs[:, l, :])
+        # Generate random phases
+        random_phase = torch.exp(2j * np.pi * torch.rand_like(amplitude, dtype=torch.float32)).to(
+            device
+        )
+        # Create new coefficients with correct amplitude but random phase
+        input_coeffs[:, l, :] = amplitude * random_phase
+
+    # --- 3. Optimization: Compare three approaches ---
     # Neither model has access to ground truth pixels!
 
     # Model A: Only stays close to input (no structural guidance)
@@ -57,15 +110,19 @@ def run_deblurring_demo():
     coeffs_B = input_coeffs.clone().detach().requires_grad_(True)
     opt_B = optim.Adam([coeffs_B], lr=0.01)
 
+    # Model C: Stays close to input + matches power spectrum (spectral loss)
+    coeffs_C = input_coeffs.clone().detach().requires_grad_(True)
+    opt_C = optim.Adam([coeffs_C], lr=0.01)
+
     print('Training starts...')
     print('Model A: Regularization only (no structural prior)')
     print('Model B: Regularization + Bispectrum constraint')
-    print('-' * 50)
+    print('Model C: Regularization + Spectral (power spectrum) constraint')
+    print('-' * 70)
 
     for step in range(STEPS):
         # --- Model A: Only regularization toward input ---
         opt_A.zero_grad()
-        # Just penalize deviation from input - will stay blurry
         loss_reg_A = torch.mean(torch.abs(coeffs_A - input_coeffs) ** 2)
         loss_A = loss_reg_A
         loss_A.backward()
@@ -73,27 +130,34 @@ def run_deblurring_demo():
 
         # --- Model B: Regularization + Bispectrum ---
         opt_B.zero_grad()
-
-        # 1. Regularization: don't deviate too far from input
         loss_reg_B = torch.mean(torch.abs(coeffs_B - input_coeffs) ** 2)
-
-        # 2. Bispectrum constraint: match the structural fingerprint
         pred_invariants = bsp_module(coeffs_B)
         loss_bsp = torch.mean(torch.abs(pred_invariants - truth_invariants) ** 2)
-
         loss_B = LAMBDA_REG * loss_reg_B + LAMBDA_BSP * loss_bsp
         loss_B.backward()
         opt_B.step()
 
+        # --- Model C: Regularization + Spectral (power spectrum) ---
+        opt_C.zero_grad()
+        loss_reg_C = torch.mean(torch.abs(coeffs_C - input_coeffs) ** 2)
+        pred_power = power_spectrum(coeffs_C)
+        loss_spec = torch.mean((pred_power - truth_power) ** 2)
+        loss_C = LAMBDA_REG * loss_reg_C + LAMBDA_SPEC * loss_spec
+        loss_C.backward()
+        opt_C.step()
+
         if step % 100 == 0:
             print(
-                f'Step {step}: Loss A={loss_A.item():.6f} | Loss B={loss_B.item():.6f} (bsp={loss_bsp.item():.6f})'
+                f'Step {step}: '
+                f'Loss A={loss_A.item():.6f} | '
+                f'Loss B={loss_B.item():.6f} (bsp={loss_bsp.item():.6f}) | '
+                f'Loss C={loss_C.item():.6f} (spec={loss_spec.item():.6f})'
             )
 
-    print('-' * 50)
+    print('-' * 70)
     print('Done!')
 
-    return truth_coeffs, input_coeffs, coeffs_A.detach(), coeffs_B.detach()
+    return truth_coeffs, input_coeffs, coeffs_A.detach(), coeffs_B.detach(), coeffs_C.detach()
 
 
 # --- Spherical Rendering ---
@@ -122,90 +186,396 @@ def sh_to_spatial(coeffs: torch.Tensor, nlat: int = 64, nlon: int = 128) -> np.n
 
 
 # --- Analysis Helper ---
-def analyze_results(truth, blurry, res_reg_only, res_bispectrum):
+def analyze_results(
+    truth: torch.Tensor,
+    blurry: torch.Tensor,
+    res_reg_only: torch.Tensor,
+    res_bispectrum: torch.Tensor,
+    res_spectral: torch.Tensor,
+) -> None:
     """
-    Combined visualization: spatial images + power spectrum.
+    Combined visualization: spatial images + power spectrum + correlation coefficient C_ℓ.
+
+    The correlation coefficient C_ℓ is the "honest" metric that proves phase recovery:
+    - Power spectrum only measures amplitudes |a_ℓm|²
+    - C_ℓ measures phase alignment: if C_ℓ ≈ 1, phases match ground truth
+    - Spectral constraint can achieve perfect power match with C_ℓ → 0 (random phases!)
+    - Bispectrum constraint should achieve C_ℓ → 1 (phase coherence recovered)
     """
 
-    def get_power_spectrum(coeffs):
+    def get_power_spectrum(coeffs: torch.Tensor) -> np.ndarray:
         return torch.sum(torch.abs(coeffs) ** 2, dim=-1).squeeze().detach().cpu().numpy()
 
+    # --- Compute Correlation Coefficients C_ℓ ---
+    print('\nComputing correlation coefficients C_ℓ (phase alignment metric)...')
+    corr_blurry = correlation_per_ell(blurry, truth)
+    corr_reg = correlation_per_ell(res_reg_only, truth)
+    corr_bsp = correlation_per_ell(res_bispectrum, truth)
+    corr_spec = correlation_per_ell(res_spectral, truth)
+
+    lmax = truth.shape[1] - 1
+    ell_values = np.arange(lmax + 1)
+
+    # Print correlation table
+    print('\n' + '=' * 60)
+    print('CORRELATION COEFFICIENT C_ℓ vs Ground Truth')
+    print('(C_ℓ = 1.0: perfect phase match, C_ℓ = 0: random phases)')
+    print('=' * 60)
+    print(f'{"ℓ":>3} | {"Blurry":>10} | {"Reg Only":>10} | {"Spectral":>10} | {"Bispectrum":>10}')
+    print('-' * 60)
+    for ell in ell_values:
+        print(
+            f'{ell:>3} | {corr_blurry[ell]:>10.4f} | {corr_reg[ell]:>10.4f} | '
+            f'{corr_spec[ell]:>10.4f} | {corr_bsp[ell]:>10.4f}'
+        )
+    print('=' * 60)
+
+    # Mean correlation (excluding ℓ=0 which is always 1)
+    print('\nMean C_ℓ (ℓ > 0):')
+    print(f'  Blurry:     {np.mean(corr_blurry[1:]):.4f}')
+    print(f'  Reg Only:   {np.mean(corr_reg[1:]):.4f}')
+    print(f'  Spectral:   {np.mean(corr_spec[1:]):.4f}')
+    print(f'  Bispectrum: {np.mean(corr_bsp[1:]):.4f}')
+
     # Render all signals to spatial domain
-    print('Rendering spherical harmonics to spatial domain...')
+    print('\nRendering spherical harmonics to spatial domain...')
     img_truth = sh_to_spatial(truth)
     img_blurry = sh_to_spatial(blurry)
     img_reg = sh_to_spatial(res_reg_only)
     img_bsp = sh_to_spatial(res_bispectrum)
+    img_spec = sh_to_spatial(res_spectral)
 
-    # Compute power spectra
+    # Compute power spectra for plotting
     ps_truth = get_power_spectrum(truth)
     ps_blurry = get_power_spectrum(blurry)
-    ps_reg = get_power_spectrum(res_reg_only)
+    get_power_spectrum(res_reg_only)
     ps_bsp = get_power_spectrum(res_bispectrum)
+    ps_spec = get_power_spectrum(res_spectral)
+
+    # --- Compute MSE metrics for table ---
+    device = truth.device
+    bsp_module = SO3onS2(lmax=lmax).to(device)
+
+    def power_mse(coeffs: torch.Tensor, ref: torch.Tensor) -> float:
+        ps1 = power_spectrum(coeffs)
+        ps2 = power_spectrum(ref)
+        return torch.mean((ps1 - ps2) ** 2).item()
+
+    def bispec_mse(coeffs: torch.Tensor, ref: torch.Tensor) -> float:
+        with torch.no_grad():
+            inv1 = bsp_module(coeffs)
+            inv2 = bsp_module(ref)
+        return torch.mean(torch.abs(inv1 - inv2) ** 2).item()
+
+    # Compute metrics
+    metrics: dict[str, dict[str, float]] = {}
+    models_for_metrics = [
+        ('Input', blurry),
+        ('Reg. Only', res_reg_only),
+        ('Spectral', res_spectral),
+        ('Bispectrum', res_bispectrum),
+    ]
+    for name, coeffs in models_for_metrics:
+        metrics[name] = {
+            'power_mse': power_mse(coeffs, truth),
+            'bispec_mse': bispec_mse(coeffs, truth),
+            'mean_corr': float(np.mean(correlation_per_ell(coeffs, truth)[1:])),
+        }
 
     # Common colormap range for fair comparison
-    vmin = min(img_truth.min(), img_blurry.min(), img_reg.min(), img_bsp.min())
-    vmax = max(img_truth.max(), img_blurry.max(), img_reg.max(), img_bsp.max())
-
-    # Create combined figure
-    fig = plt.figure(figsize=(14, 10))
-
-    # Top row: spatial images
-    ax1 = fig.add_subplot(2, 3, 1)
-    im1 = ax1.imshow(
-        img_truth, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
-    )
-    ax1.set_title('Ground Truth (Sharp)', fontweight='bold')
-    ax1.set_xlabel('Longitude')
-    ax1.set_ylabel('Latitude')
-
-    ax2 = fig.add_subplot(2, 3, 2)
-    ax2.imshow(
-        img_blurry, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
-    )
-    ax2.set_title('Input (Blurry)', fontweight='bold')
-    ax2.set_xlabel('Longitude')
-
-    ax3 = fig.add_subplot(2, 3, 4)
-    ax3.imshow(
-        img_reg, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
-    )
-    ax3.set_title('Regularization Only', fontweight='bold')
-    ax3.set_xlabel('Longitude')
-    ax3.set_ylabel('Latitude')
-
-    ax4 = fig.add_subplot(2, 3, 5)
-    ax4.imshow(
-        img_bsp, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
-    )
-    ax4.set_title('Bispectrum Constraint', fontweight='bold', color='green')
-    ax4.set_xlabel('Longitude')
-
-    # Add colorbar
-    cbar_ax = fig.add_axes([0.02, 0.15, 0.01, 0.7])
-    fig.colorbar(im1, cax=cbar_ax)
-
-    # Right column: power spectrum
-    ax5 = fig.add_subplot(1, 3, 3)
-    ax5.plot(ps_truth, 'k-', linewidth=2, marker='o', label='Ground Truth')
-    ax5.plot(ps_blurry, 'r--', linewidth=2, marker='s', label='Input (Blurry)')
-    ax5.plot(ps_reg, 'b-', linewidth=1.5, marker='^', label='Reg. Only')
-    ax5.plot(ps_bsp, 'g-', linewidth=2, marker='d', label='Bispectrum')
-    ax5.set_yscale('log')
-    ax5.set_xlabel('Degree l')
-    ax5.set_ylabel('Power')
-    ax5.set_title('Power Spectrum')
-    ax5.legend(loc='upper right', fontsize=9)
-    ax5.grid(True, alpha=0.3)
+    all_imgs = [img_truth, img_blurry, img_reg, img_bsp, img_spec]
+    vmin = min(img.min() for img in all_imgs)
+    vmax = max(img.max() for img in all_imgs)
+
+    # Create figure with GridSpec for flexible layout
+    # Layout: 4 rows
+    #   Row 0: 5 small images (truth, input, reg, spectral, bispectrum)
+    #   Row 1: Power spectrum (left 2/3) + Correlation C_ℓ (right 1/3)
+    #   Row 2: Metrics table (left) + Key insight (middle) + Math (right)
+    fig = plt.figure(figsize=(20, 14))
+    gs = fig.add_gridspec(
+        3,
+        5,
+        height_ratios=[0.7, 1.0, 0.7],
+        width_ratios=[1, 1, 1, 1, 1],
+        hspace=0.35,
+        wspace=0.2,
+    )
+
+    # --- Row 0: 5 spatial images ---
+    images = [
+        (img_truth, 'Ground Truth', 'black'),
+        (img_blurry, 'Input (Random Phases)', 'red'),
+        (img_reg, 'Reg. Only', 'blue'),
+        (img_spec, 'Spectral', '#CC6600'),
+        (img_bsp, 'Bispectrum', 'green'),
+    ]
+    for i, (img, title, color) in enumerate(images):
+        ax = fig.add_subplot(gs[0, i])
+        im = ax.imshow(
+            img, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto', extent=[0, 360, -90, 90]
+        )
+        ax.set_title(title, fontweight='bold', fontsize=10, color=color)
+        ax.set_xlabel('Lon', fontsize=8)
+        if i == 0:
+            ax.set_ylabel('Lat', fontsize=8)
+        ax.tick_params(labelsize=7)
+
+    # Add colorbar to the right of images
+    cbar_ax = fig.add_axes([0.92, 0.72, 0.01, 0.15])
+    fig.colorbar(im, cax=cbar_ax, label='Amplitude')
+
+    # --- Row 1: Power Spectrum (left 3 cols) + Correlation C_ℓ (right 2 cols) ---
+    ax_ps = fig.add_subplot(gs[1, 0:3])
+    ax_ps.plot(
+        ell_values, ps_truth, 'k-', linewidth=2, marker='o', markersize=8, label='Ground Truth'
+    )
+    ax_ps.plot(ell_values, ps_blurry, 'r--', linewidth=2, marker='s', markersize=7, label='Input')
+    ax_ps.plot(
+        ell_values,
+        ps_spec,
+        color='#CC6600',
+        linestyle='-',
+        linewidth=2,
+        marker='x',
+        markersize=8,
+        label='Spectral',
+    )
+    ax_ps.plot(ell_values, ps_bsp, 'g-', linewidth=2, marker='d', markersize=7, label='Bispectrum')
+    ax_ps.set_yscale('log')
+    ax_ps.set_xlabel('Degree $\\ell$', fontsize=11)
+    ax_ps.set_ylabel('Power $P_\\ell = \\sum_m |a_{\\ell m}|^2$', fontsize=11)
+    ax_ps.set_title('Power Spectrum (Amplitude Only)', fontweight='bold', fontsize=11)
+    ax_ps.legend(loc='upper right', fontsize=9)
+    ax_ps.grid(True, alpha=0.3)
+    ax_ps.set_xticks(ell_values)
+
+    # Correlation coefficient C_ℓ (THE KEY METRIC!)
+    ax_corr = fig.add_subplot(gs[1, 3:5])
+
+    # Add shaded background regions for visual clarity
+    ax_corr.axhspan(0.8, 1.15, alpha=0.15, color='green', label='_nolegend_')
+    ax_corr.axhspan(-0.55, 0.3, alpha=0.1, color='red', label='_nolegend_')
+    ax_corr.axhspan(0.3, 0.8, alpha=0.08, color='orange', label='_nolegend_')
+
+    # Reference lines
+    ax_corr.axhline(y=1.0, color='#2E7D32', linestyle='-', linewidth=1.5, alpha=0.6)
+    ax_corr.axhline(y=0.0, color='#B71C1C', linestyle='-', linewidth=1.5, alpha=0.6)
+
+    # Fill between bispectrum and spectral to highlight the gap
+    ax_corr.fill_between(
+        ell_values, corr_spec, corr_bsp, alpha=0.3, color='#4CAF50', label='_nolegend_'
+    )
+
+    # Plot lines
+    ax_corr.plot(
+        ell_values,
+        corr_spec,
+        color='#D84315',
+        linestyle='-',
+        linewidth=2.5,
+        marker='o',
+        markersize=10,
+        markerfacecolor='white',
+        markeredgewidth=2.5,
+        label='Input / Spectral',
+        zorder=5,
+    )
+    ax_corr.plot(
+        ell_values,
+        corr_bsp,
+        color='#2E7D32',
+        linestyle='-',
+        linewidth=3,
+        marker='D',
+        markersize=11,
+        markerfacecolor='#A5D6A7',
+        markeredgewidth=2.5,
+        markeredgecolor='#1B5E20',
+        label='Bispectrum',
+        zorder=6,
+    )
+
+    ax_corr.set_xlabel('Degree $\\ell$', fontsize=11)
+    ax_corr.set_ylabel('$C_\\ell$', fontsize=12)
+    ax_corr.set_title(
+        'Correlation Coefficient $C_\\ell$ (Phase Alignment)', fontweight='bold', fontsize=11
+    )
+
+    ax_corr.legend(loc='lower left', fontsize=9, framealpha=0.95, fancybox=True)
+    ax_corr.grid(True, alpha=0.4, linestyle='--', linewidth=0.8)
+    ax_corr.set_ylim(-0.55, 1.15)
+    ax_corr.set_xlim(-0.3, lmax + 0.3)
+    ax_corr.set_xticks(ell_values)
+
+    # Annotations
+    ax_corr.annotate(
+        '$C_\\ell=1$: phases match',
+        xy=(0.02, 1.05),
+        fontsize=8,
+        color='#1B5E20',
+        fontweight='bold',
+    )
+    ax_corr.annotate(
+        '$C_\\ell=0$: random', xy=(0.02, 0.05), fontsize=8, color='#B71C1C', fontweight='bold'
+    )
+
+    # Arrow showing gap
+    mid_ell = 4
+    gap_y = (corr_bsp[mid_ell] + corr_spec[mid_ell]) / 2
+    ax_corr.annotate(
+        '',
+        xy=(mid_ell, corr_bsp[mid_ell] - 0.02),
+        xytext=(mid_ell, corr_spec[mid_ell] + 0.02),
+        arrowprops={'arrowstyle': '<->', 'color': '#1565C0', 'lw': 2.5},
+    )
+    ax_corr.annotate(
+        'Phase\nrecovery!',
+        xy=(mid_ell + 0.15, gap_y),
+        fontsize=10,
+        ha='left',
+        va='center',
+        color='#1565C0',
+        fontweight='bold',
+    )
+
+    # --- Row 2: Metrics table + Key insights + C_ℓ interpretation ---
+    # Left: Metrics table
+    ax_table = fig.add_subplot(gs[2, 0:2])
+    ax_table.axis('off')
+
+    table_data = [
+        ['Model', 'Power MSE', 'Bispec MSE', 'Mean $C_\\ell$'],
+        [
+            'Input',
+            f'{metrics["Input"]["power_mse"]:.2e}',
+            f'{metrics["Input"]["bispec_mse"]:.2e}',
+            f'{metrics["Input"]["mean_corr"]:.3f}',
+        ],
+        [
+            'Spectral',
+            f'{metrics["Spectral"]["power_mse"]:.2e}',
+            f'{metrics["Spectral"]["bispec_mse"]:.2e}',
+            f'{metrics["Spectral"]["mean_corr"]:.3f}',
+        ],
+        [
+            'Bispectrum',
+            f'{metrics["Bispectrum"]["power_mse"]:.2e}',
+            f'{metrics["Bispectrum"]["bispec_mse"]:.2e}',
+            f'{metrics["Bispectrum"]["mean_corr"]:.3f}',
+        ],
+    ]
+
+    # Color cells
+    cell_colors = [['#E0E0E0'] * 4]  # Header
+    for name in ['Input', 'Spectral', 'Bispectrum']:
+        row = ['white']
+        # Power MSE
+        row.append('#90EE90' if metrics[name]['power_mse'] < 1e-6 else '#FFB6C1')
+        # Bispec MSE
+        row.append('#90EE90' if metrics[name]['bispec_mse'] < 1e-6 else '#FFB6C1')
+        # Mean C_ℓ
+        row.append('#90EE90' if metrics[name]['mean_corr'] > 0.8 else '#FFB6C1')
+        cell_colors.append(row)
+
+    table = ax_table.table(
+        cellText=table_data,
+        cellColours=cell_colors,
+        loc='center',
+        cellLoc='center',
+        colWidths=[0.25, 0.25, 0.25, 0.25],
+    )
+    table.auto_set_font_size(False)
+    table.set_fontsize(10)
+    table.scale(1.0, 2.0)
+    ax_table.set_title('Metrics vs Ground Truth', fontweight='bold', fontsize=11, pad=15)
+
+    # Middle: Key insight
+    ax_insight = fig.add_subplot(gs[2, 2])
+    ax_insight.axis('off')
+    insight_text = (
+        'KEY INSIGHT\n'
+        '━━━━━━━━━━━━━━━━━━━━━━━\n\n'
+        'Spectral constraint:\n'
+        '  Power MSE → 0 (perfect!)\n'
+        '  But Mean $C_\\ell$ ≈ 0.3 (bad)\n\n'
+        'Power spectrum = |$a_{\\ell m}$|²\n'
+        'discards phase information.\n\n'
+        'Same amplitudes,\n'
+        'wrong spatial structure!'
+    )
+    ax_insight.text(
+        0.5,
+        0.5,
+        insight_text,
+        transform=ax_insight.transAxes,
+        fontsize=10,
+        va='center',
+        ha='center',
+        fontfamily='monospace',
+        bbox={
+            'boxstyle': 'round,pad=0.4',
+            'facecolor': '#FFF3E0',
+            'edgecolor': '#E65100',
+            'linewidth': 2,
+        },
+    )
+
+    # Right: C_ℓ interpretation
+    ax_interp = fig.add_subplot(gs[2, 3:5])
+    ax_interp.axis('off')
+    interp_text = (
+        'WHY $C_\\ell$ MATTERS\n'
+        '━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n'
+        'Correlation coefficient:\n'
+        '$C_\\ell = \\frac{\\sum_m \\hat{a}_{\\ell m} a^*_{\\ell m}}'
+        '{\\sqrt{\\sum_m |\\hat{a}|^2 \\sum_m |a|^2}}$\n\n'
+        '$C_\\ell = 1$: phases perfectly aligned\n'
+        '$C_\\ell = 0$: phases uncorrelated\n\n'
+        'Bispectrum encodes phase\n'
+        'relationships → recovers $C_\\ell$ ≈ 1\n'
+        'even when starting from random!'
+    )
+    ax_interp.text(
+        0.5,
+        0.5,
+        interp_text,
+        transform=ax_interp.transAxes,
+        fontsize=10,
+        va='center',
+        ha='center',
+        fontfamily='monospace',
+        bbox={
+            'boxstyle': 'round,pad=0.4',
+            'facecolor': '#E8F5E9',
+            'edgecolor': '#2E7D32',
+            'linewidth': 2,
+        },
+    )
 
     plt.suptitle(
-        'High-Frequency Recovery via Bispectrum Constraint', fontsize=14, fontweight='bold'
+        'Bispectrum Recovers Phase Coherence — Proven by Independent Metric $C_\\ell$',
+        fontsize=14,
+        fontweight='bold',
+        y=0.98,
     )
-    plt.tight_layout(rect=[0.04, 0, 1, 0.96])
 
     plt.savefig('deblur_results.png', dpi=150, bbox_inches='tight')
     plt.close()
-    print('Saved plot to deblur_results.png')
+    print('\nSaved plot to deblur_results.png')
+
+    # Final summary
+    print('\n' + '=' * 70)
+    print('KEY RESULT:')
+    print('=' * 70)
+    print('Spectral constraint matches power spectrum perfectly, but C_ℓ drops off')
+    print('  → Power spectrum only captures |a_ℓm|², losing phase information')
+    print('  → Result: correct amplitudes, but wrong spatial structure')
+    print()
+    print('Bispectrum constraint maintains high C_ℓ across all degrees')
+    print('  → Bispectrum encodes phase relationships between frequencies')
+    print('  → Result: correct amplitudes AND correct spatial structure')
+    print('=' * 70)
 
 
 if __name__ == '__main__':

From 864ae3eb30b9eb3cbd47bede8bcd71f9a51beb3c Mon Sep 17 00:00:00 2001
From: Johan Mathe <johan.mathe@gmail.com>
Date: Tue, 20 Jan 2026 23:47:23 -0800
Subject: [PATCH 5/6] Add koopman demo

---
 examples/toy_koopman_bispectrum.py | 789 +++++++++++++++++++++++++++++
 1 file changed, 789 insertions(+)
 create mode 100644 examples/toy_koopman_bispectrum.py

diff --git a/examples/toy_koopman_bispectrum.py b/examples/toy_koopman_bispectrum.py
new file mode 100644
index 0000000..adcc05f
--- /dev/null
+++ b/examples/toy_koopman_bispectrum.py
@@ -0,0 +1,789 @@
+"""Toy Koopman bispectrum demo: learning continuous-time dynamics on the sphere.
+
+This example demonstrates:
+1. Simulating the advected Swift-Hohenberg equation on S²:
+       ∂_t f = r*f - (1 + ∇²)²f - f³ + Ω ∂_φ f
+   - Pattern formation from random noise
+   - Rotation via exact spectral advection (i*m*Ω term)
+   - All derivatives computed spectrally for numerical stability
+2. Building lifted features: Φ(f) = [SH coeffs; λ * bispectrum(coeffs)]
+3. Learning a Koopman generator L from coarse snapshots
+4. Producing substep predictions via exp(L*j*δ)Φ_t
+5. 3D sphere visualization of rotating patterns
+
+Run from repo root:
+    python examples/toy_koopman_bispectrum.py
+    # or
+    python -m examples.toy_koopman_bispectrum
+"""
+
+from __future__ import annotations
+
+import subprocess
+from collections.abc import Callable
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from torch_harmonics import InverseRealSHT, RealSHT
+
+from bispectrum import SO3onS2
+
+if TYPE_CHECKING:
+    pass
+
+# =============================================================================
+# Hyperparameters
+# =============================================================================
+NLAT = 32  # Latitude points
+NLON = 64  # Longitude points
+LMAX = 5  # Max SH degree (CG limit)
+# Advected Swift-Hohenberg parameters
+DT_FINE = 5e-4  # Time step (larger is safe with spectral methods)
+DT_COARSE = 0.1  # Snapshot interval
+T_TOTAL = 5.0  # Total simulation time
+R_PARAM = 1.0  # Instability parameter (strength of pattern growth)
+L0_TARGET = 2  # Target degree for instability (l=2 modes grow)
+OMEGA = 2.0  # Rotation speed (radians per unit time)
+LAM = 0.1  # Bispectrum weight in lift
+GAMMA = 1e-4  # Frobenius regularization
+LR = 0.01  # Adam learning rate
+TRAIN_STEPS = 500  # Koopman training iterations
+NUM_SUBSTEPS = 10  # Substeps between coarse snapshots
+EPS = 0.1  # sinθ clamping (larger to avoid pole instabilities)
+
+
+# =============================================================================
+# Grid Construction
+# =============================================================================
+def make_sphere_grid(
+    nlat: int, nlon: int, device: torch.device
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, float, float]:
+    """Create equiangular grid on the sphere matching torch-harmonics convention.
+
+    Args:
+        nlat: Number of latitude points
+        nlon: Number of longitude points
+        device: Torch device
+
+    Returns:
+        theta: (nlat, 1) colatitude values in [0, pi]
+        phi: (1, nlon) longitude values in [0, 2pi)
+        sin_theta: (nlat, 1) sin(theta) clamped for safety
+        dtheta: Grid spacing in theta
+        dphi: Grid spacing in phi
+    """
+    # Match torch-harmonics equiangular grid convention
+    theta_1d = torch.linspace(0, np.pi, nlat, device=device, dtype=torch.float64)
+    phi_1d = torch.linspace(0, 2 * np.pi, nlon + 1, device=device, dtype=torch.float64)[:-1]
+
+    theta = theta_1d.unsqueeze(1)  # (nlat, 1)
+    phi = phi_1d.unsqueeze(0)  # (1, nlon)
+
+    # sin(theta) with clamping for numerical safety at poles (theta=0 or pi)
+    sin_theta = torch.clamp(torch.sin(theta), min=EPS)
+
+    dtheta = np.pi / (nlat - 1)  # Note: nlat-1 intervals
+    dphi = 2 * np.pi / nlon
+
+    return theta, phi, sin_theta, dtheta, dphi
+
+
+# =============================================================================
+# Advected Swift-Hohenberg PDE (Rotating Pattern Formation)
+# Equation: ∂t f = r*f - (l₀(l₀+1) + ∇²)² f - f³ + Ω ∂_φ f
+# =============================================================================
+def advected_sh_rhs(
+    coeffs: torch.Tensor,
+    sht: RealSHT,
+    isht: InverseRealSHT,
+    l_lap: torch.Tensor,
+    m_vec: torch.Tensor,
+    r_param: float = 1.0,
+    l0_target: int = 2,
+    omega: float = 2.0,
+) -> torch.Tensor:
+    """Compute RHS for advected Swift-Hohenberg in spectral space.
+
+    Equation: ∂t f = r*f - (l₀(l₀+1) + ∇²)² f - f³ + Ω ∂_φ f
+
+    This targets l=l0_target modes for instability. The operator becomes:
+        r - (l₀(l₀+1) - l(l+1))²
+    which is maximized (= r) when l = l0_target.
+
+    The rotation term Ω ∂_φ f is computed exactly in spectral space as (i*m*Ω)*f_lm.
+
+    Args:
+        coeffs: (1, L, M) complex SH coefficients
+        sht: Forward spherical harmonic transform
+        isht: Inverse spherical harmonic transform
+        l_lap: (L, M) precomputed Laplacian eigenvalues [-l(l+1)]
+        m_vec: (L, M) precomputed m indices for rotation
+        r_param: Instability parameter (positive = patterns form)
+        l0_target: Target degree for instability (default l=2)
+        omega: Rotation speed (radians per unit time)
+
+    Returns:
+        (1, L, M) RHS in spectral space
+    """
+    # Target eigenvalue for instability
+    k0_sq = l0_target * (l0_target + 1)  # For l=2: k0_sq = 6
+
+    # 1. Linear pattern term (spectral)
+    # Operator: r - (k0² + ∇²)² = r - (k0² - l(l+1))²
+    # l_lap = -l(l+1), so k0² + l_lap = k0² - l(l+1)
+    linear_op = r_param - (k0_sq + l_lap) ** 2
+    term_pattern = linear_op * coeffs
+
+    # 2. Advection/rotation term (spectral)
+    # ∂_φ corresponds to multiplying by i*m
+    term_advection = 1j * m_vec * omega * coeffs
+
+    # 3. Nonlinear term: -f³ (must compute in grid space)
+    f_grid = isht(coeffs)
+    nonlinear_grid = -(f_grid**3)
+    term_nonlinear = sht(nonlinear_grid)
+
+    return term_pattern + term_advection + term_nonlinear
+
+
+def rk4_step_spectral(
+    coeffs: torch.Tensor,
+    dt: float,
+    rhs_fn: Callable[[torch.Tensor], torch.Tensor],
+) -> torch.Tensor:
+    """Single RK4 time step in spectral space.
+
+    Args:
+        coeffs: Current SH coefficients
+        dt: Time step size
+        rhs_fn: Function computing the RHS in spectral space
+
+    Returns:
+        Updated SH coefficients
+    """
+    k1 = rhs_fn(coeffs)
+    k2 = rhs_fn(coeffs + 0.5 * dt * k1)
+    k3 = rhs_fn(coeffs + 0.5 * dt * k2)
+    k4 = rhs_fn(coeffs + dt * k3)
+    return coeffs + (dt / 6.0) * (k1 + 2 * k2 + 2 * k3 + k4)
+
+
+# =============================================================================
+# SHT Wrappers
+# =============================================================================
+def grid_to_sh(f: torch.Tensor, sht: RealSHT) -> torch.Tensor:
+    """Convert grid field to spherical harmonic coefficients.
+
+    Args:
+        f: (nlat, nlon) real field
+        sht: RealSHT transform object
+
+    Returns:
+        (1, lmax+1, mmax+1) complex SH coefficients
+    """
+    return sht(f.unsqueeze(0).double())
+
+
+def sh_to_grid(coeffs: torch.Tensor, isht: InverseRealSHT) -> torch.Tensor:
+    """Convert SH coefficients to grid field.
+
+    Args:
+        coeffs: (1, lmax+1, mmax+1) complex SH coefficients
+        isht: InverseRealSHT transform object
+
+    Returns:
+        (nlat, nlon) real field
+    """
+    return isht(coeffs).squeeze(0)
+
+
+# =============================================================================
+# Lift Construction
+# =============================================================================
+def build_lift(
+    coeffs: torch.Tensor,
+    bsp_module: SO3onS2,
+    lam: float,
+) -> torch.Tensor:
+    """Build lifted feature vector from SH coefficients.
+
+    Φ(f) = [Re(coeffs_flat); Im(coeffs_flat); λ*Re(bsp); λ*Im(bsp)]
+
+    Args:
+        coeffs: (1, lmax+1, mmax+1) complex SH coefficients
+        bsp_module: SO3onS2 bispectrum module
+        lam: Weight for bispectrum features
+
+    Returns:
+        (N,) real feature vector
+    """
+    # Flatten SH coefficients
+    c_flat = coeffs.flatten()  # complex
+    sh_part = torch.cat([c_flat.real, c_flat.imag])
+
+    # Compute bispectrum
+    bsp = bsp_module(coeffs)  # (1, bsp_size) complex
+    bsp_flat = bsp.flatten()
+    bsp_part = lam * torch.cat([bsp_flat.real, bsp_flat.imag])
+
+    return torch.cat([sh_part, bsp_part])
+
+
+def lift_to_sh(
+    lift: torch.Tensor,
+    lmax: int,
+    mmax: int,
+) -> torch.Tensor:
+    """Extract SH coefficients from lift vector (for decoding).
+
+    Args:
+        lift: (N,) feature vector
+        lmax: Maximum l degree
+        mmax: Maximum m degree
+
+    Returns:
+        (1, lmax+1, mmax+1) complex SH coefficients
+    """
+    num_sh = (lmax + 1) * (mmax + 1)
+    real_part = lift[:num_sh]
+    imag_part = lift[num_sh : 2 * num_sh]
+    coeffs = torch.complex(real_part, imag_part)
+    return coeffs.reshape(1, lmax + 1, mmax + 1)
+
+
+# =============================================================================
+# Koopman Training
+# =============================================================================
+def train_koopman_generator(
+    lifts: list[torch.Tensor],
+    dt_coarse: float,
+    gamma: float,
+    lr: float,
+    steps: int,
+    device: torch.device,
+) -> torch.Tensor:
+    """Learn Koopman generator L via gradient descent.
+
+    Minimizes: ||Φ_{t+Δ} - exp(L*Δ)Φ_t||² + γ||L||_F²
+
+    Args:
+        lifts: List of (N,) lift vectors at consecutive coarse times
+        dt_coarse: Time interval between consecutive lifts
+        gamma: Frobenius regularization weight
+        lr: Learning rate
+        steps: Number of optimization steps
+        device: Torch device
+
+    Returns:
+        (N, N) learned generator matrix
+    """
+    N = lifts[0].shape[0]
+    L = torch.zeros(N, N, device=device, dtype=torch.float32, requires_grad=True)
+    optimizer = torch.optim.Adam([L], lr=lr)
+
+    # Stack pairs: (Phi_t, Phi_{t+dt})
+    Phi_t = torch.stack([lift.float() for lift in lifts[:-1]])  # (T-1, N)
+    Phi_next = torch.stack([lift.float() for lift in lifts[1:]])  # (T-1, N)
+
+    initial_loss = None
+    for step in range(steps):
+        optimizer.zero_grad()
+
+        # exp(L*dt) @ Phi_t^T -> (N, T-1), then transpose
+        expLdt = torch.matrix_exp(L * dt_coarse)
+        pred = (expLdt @ Phi_t.T).T  # (T-1, N)
+
+        loss_fit = torch.mean((pred - Phi_next) ** 2)
+        loss_reg = gamma * torch.sum(L**2)
+        loss = loss_fit + loss_reg
+
+        if initial_loss is None:
+            initial_loss = loss.item()
+
+        loss.backward()
+        optimizer.step()
+
+        if step % 50 == 0 or step == steps - 1:
+            print(f'  Step {step:4d}: loss={loss.item():.6e} (fit={loss_fit.item():.6e})')
+
+    final_loss = loss.item()
+    print(f'  Training complete: initial={initial_loss:.6e} -> final={final_loss:.6e}')
+    if final_loss > initial_loss / 10:
+        print('  Warning: Loss did not decrease by 10x. Consider more steps or tuning.')
+
+    return L.detach()
+
+
+def koopman_predict(phi_t: torch.Tensor, L: torch.Tensor, dt: float) -> torch.Tensor:
+    """Predict lifted state at time t+dt using Koopman generator.
+
+    Args:
+        phi_t: (N,) current lift vector
+        L: (N, N) generator matrix
+        dt: Time step
+
+    Returns:
+        (N,) predicted lift vector
+    """
+    expLdt = torch.matrix_exp(L * dt)
+    return expLdt @ phi_t
+
+
+def substep_rollout(
+    phi_start: torch.Tensor,
+    L: torch.Tensor,
+    dt_coarse: float,
+    num_substeps: int,
+) -> list[torch.Tensor]:
+    """Generate substep predictions between coarse snapshots.
+
+    Args:
+        phi_start: (N,) starting lift vector
+        L: (N, N) generator matrix
+        dt_coarse: Coarse time interval
+        num_substeps: Number of substeps
+
+    Returns:
+        List of (N,) lift vectors at substep times
+    """
+    delta = dt_coarse / num_substeps
+    preds = []
+    for j in range(num_substeps + 1):
+        expLj = torch.matrix_exp(L * j * delta)
+        phi_j = expLj @ phi_start
+        preds.append(phi_j)
+    return preds
+
+
+# =============================================================================
+# Visualization
+# =============================================================================
+def plot_sphere(
+    field: np.ndarray,
+    ax: plt.Axes,
+    title: str,
+    vmin: float,
+    vmax: float,
+    nlat: int,
+    nlon: int,
+) -> None:
+    """Plot field on a 3D sphere.
+
+    Args:
+        field: (nlat, nlon) field values
+        ax: Matplotlib 3D axes
+        title: Plot title
+        vmin, vmax: Colorbar limits
+        nlat, nlon: Grid dimensions
+    """
+    # Create sphere coordinates
+    theta_1d = np.linspace(0, np.pi, nlat)
+    phi_1d = np.linspace(0, 2 * np.pi, nlon)
+    phi_grid, theta_grid = np.meshgrid(phi_1d, theta_1d)
+
+    # Convert to Cartesian
+    x = np.sin(theta_grid) * np.cos(phi_grid)
+    y = np.sin(theta_grid) * np.sin(phi_grid)
+    z = np.cos(theta_grid)
+
+    # Normalize field to [0, 1] for colormap
+    norm_field = (field - vmin) / (vmax - vmin + 1e-10)
+    norm_field = np.clip(norm_field, 0, 1)
+
+    # Get colors from colormap
+    cmap = plt.cm.RdBu_r
+    colors = cmap(norm_field)
+
+    # Plot surface
+    ax.plot_surface(x, y, z, facecolors=colors, rstride=1, cstride=1, shade=False)
+    ax.set_title(title, fontsize=10)
+    ax.set_xlim([-1.1, 1.1])
+    ax.set_ylim([-1.1, 1.1])
+    ax.set_zlim([-1.1, 1.1])
+    ax.set_box_aspect([1, 1, 1])
+    ax.axis('off')
+
+
+def plot_comparison_sphere(
+    truth: np.ndarray,
+    pred: np.ndarray,
+    t: float,
+    out_path: Path,
+    vmin: float | None = None,
+    vmax: float | None = None,
+) -> None:
+    """Create 3-panel comparison plot with spheres: truth | predicted | error.
+
+    Args:
+        truth: (nlat, nlon) ground truth field
+        pred: (nlat, nlon) predicted field
+        t: Time value for title
+        out_path: Output file path
+        vmin, vmax: Colorbar limits (computed from data if None)
+    """
+    error = pred - truth
+    mse = np.mean(error**2)
+    nlat, nlon = truth.shape
+
+    if vmin is None:
+        vmin = min(truth.min(), pred.min())
+    if vmax is None:
+        vmax = max(truth.max(), pred.max())
+
+    err_abs = max(np.abs(error).max(), 1e-10)
+
+    # Create figure with 3D subplots
+    fig = plt.figure(figsize=(16, 5), dpi=100)
+
+    # Truth sphere
+    ax1 = fig.add_subplot(131, projection='3d')
+    plot_sphere(truth, ax1, f'Truth (t={t:.4f})', vmin, vmax, nlat, nlon)
+
+    # Prediction sphere
+    ax2 = fig.add_subplot(132, projection='3d')
+    plot_sphere(pred, ax2, 'Koopman Prediction', vmin, vmax, nlat, nlon)
+
+    # Error sphere
+    ax3 = fig.add_subplot(133, projection='3d')
+    plot_sphere(error, ax3, f'Error (MSE={mse:.2e})', -err_abs, err_abs, nlat, nlon)
+
+    # Add colorbar
+    sm = plt.cm.ScalarMappable(cmap='RdBu_r', norm=plt.Normalize(vmin=vmin, vmax=vmax))
+    sm.set_array([])
+    cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
+    fig.colorbar(sm, cax=cbar_ax, label='Field value')
+
+    plt.suptitle(f'Spherical PDE Evolution (t={t:.4f})', fontsize=12, y=0.98)
+    plt.savefig(out_path, dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def plot_comparison(
+    truth: np.ndarray,
+    pred: np.ndarray,
+    t: float,
+    out_path: Path,
+    vmin: float | None = None,
+    vmax: float | None = None,
+) -> None:
+    """Create 3-panel comparison plot: truth | predicted | error.
+
+    Args:
+        truth: (nlat, nlon) ground truth field
+        pred: (nlat, nlon) predicted field
+        t: Time value for title
+        out_path: Output file path
+        vmin, vmax: Colorbar limits (computed from data if None)
+    """
+    error = pred - truth
+    mse = np.mean(error**2)
+
+    if vmin is None:
+        vmin = min(truth.min(), pred.min())
+    if vmax is None:
+        vmax = max(truth.max(), pred.max())
+
+    # Use figsize that produces even pixel dimensions for ffmpeg
+    fig, axes = plt.subplots(1, 3, figsize=(15, 4), dpi=100)
+
+    im0 = axes[0].imshow(truth, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto')
+    axes[0].set_title(f'Truth (t={t:.4f})')
+    axes[0].set_xlabel('Longitude')
+    axes[0].set_ylabel('Latitude')
+
+    im1 = axes[1].imshow(pred, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto')
+    axes[1].set_title('Koopman Prediction')
+    axes[1].set_xlabel('Longitude')
+
+    err_abs = np.abs(error).max()
+    im2 = axes[2].imshow(error, cmap='RdBu_r', vmin=-err_abs, vmax=err_abs, aspect='auto')
+    axes[2].set_title(f'Error (MSE={mse:.2e})')
+    axes[2].set_xlabel('Longitude')
+
+    fig.colorbar(im0, ax=axes[0], shrink=0.8)
+    fig.colorbar(im1, ax=axes[1], shrink=0.8)
+    fig.colorbar(im2, ax=axes[2], shrink=0.8)
+
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=150, bbox_inches='tight')
+    plt.close()
+
+
+def save_animation_frames(
+    truth_fields: list[np.ndarray],
+    pred_fields: list[np.ndarray],
+    times: list[float],
+    frames_dir: Path,
+    use_sphere: bool = True,
+) -> tuple[float, float]:
+    """Save individual PNG frames for animation.
+
+    Args:
+        truth_fields: List of (nlat, nlon) ground truth fields
+        pred_fields: List of (nlat, nlon) predicted fields
+        times: List of time values
+        frames_dir: Directory to save frames
+        use_sphere: If True, use 3D sphere visualization
+
+    Returns:
+        (vmin, vmax) global colorbar limits used
+    """
+    frames_dir.mkdir(parents=True, exist_ok=True)
+
+    # Compute global colorbar limits
+    all_data = truth_fields + pred_fields
+    vmin = min(f.min() for f in all_data)
+    vmax = max(f.max() for f in all_data)
+
+    plot_fn = plot_comparison_sphere if use_sphere else plot_comparison
+
+    for i, (truth, pred, t) in enumerate(zip(truth_fields, pred_fields, times, strict=True)):
+        frame_path = frames_dir / f'frame_{i:04d}.png'
+        plot_fn(truth, pred, t, frame_path, vmin=vmin, vmax=vmax)
+
+    print(f'Saved {len(times)} frames to {frames_dir}')
+    return vmin, vmax
+
+
+def run_ffmpeg(frames_dir: Path, out_mp4: Path) -> bool:
+    """Stitch frames into MP4 using ffmpeg.
+
+    Args:
+        frames_dir: Directory containing frame_XXXX.png files
+        out_mp4: Output MP4 file path
+
+    Returns:
+        True if successful, False otherwise
+    """
+    # Use -vf pad to ensure even dimensions (required by libx264)
+    cmd = [
+        'ffmpeg',
+        '-y',
+        '-framerate',
+        '10',
+        '-i',
+        str(frames_dir / 'frame_%04d.png'),
+        '-vf',
+        'pad=ceil(iw/2)*2:ceil(ih/2)*2',
+        '-c:v',
+        'libx264',
+        '-pix_fmt',
+        'yuv420p',
+        str(out_mp4),
+    ]
+    try:
+        subprocess.run(cmd, check=True, capture_output=True)
+        print(f'Saved animation to {out_mp4}')
+        return True
+    except FileNotFoundError:
+        print(f'ffmpeg not found. Frames saved to {frames_dir}')
+        return False
+    except subprocess.CalledProcessError as e:
+        print(f'ffmpeg failed: {e.stderr.decode() if e.stderr else "unknown error"}')
+        print(f'Frames saved to {frames_dir}')
+        return False
+
+
+# =============================================================================
+# Main
+# =============================================================================
+def main() -> None:
+    """Run the toy Koopman bispectrum demo."""
+    print('=' * 70)
+    print('Toy Koopman Bispectrum Demo')
+    print('=' * 70)
+
+    # Device selection
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f'Device: {device}')
+
+    # Output directory
+    out_dir = Path('outputs')
+    out_dir.mkdir(exist_ok=True)
+    frames_dir = out_dir / 'frames'
+
+    # --- Setup ---
+    print('\n[1] Setting up grid and transforms...')
+    theta, phi, sin_theta, dtheta, dphi = make_sphere_grid(NLAT, NLON, device)
+
+    # SHT transforms
+    # torch-harmonics uses lmax/mmax as the number of modes (0 to lmax-1)
+    # So for max degree LMAX, we need lmax=LMAX+1
+    sht = RealSHT(NLAT, NLON, lmax=LMAX + 1, mmax=LMAX + 1, grid='equiangular', norm='ortho')
+    isht = InverseRealSHT(NLAT, NLON, lmax=LMAX + 1, mmax=LMAX + 1, grid='equiangular', norm='ortho')
+    sht = sht.to(device).double()
+    isht = isht.to(device).double()
+
+    # Precompute Laplacian eigenvalues: l_lap[l, m] = -l(l+1)
+    l_indices = torch.arange(LMAX + 1, device=device, dtype=torch.float64)
+    l_lap = -l_indices * (l_indices + 1)
+    l_lap = l_lap.view(-1, 1).expand(-1, LMAX + 1)  # (L, M)
+
+    # Precompute m-values for rotation: m_vec[l, m] = m
+    m_indices = torch.arange(LMAX + 1, device=device, dtype=torch.float64)
+    m_vec = m_indices.view(1, -1).expand(LMAX + 1, -1)  # (L, M)
+
+    # Bispectrum module
+    bsp_module = SO3onS2(lmax=LMAX).to(device)
+    print(f'  Bispectrum output size: {bsp_module.output_size}')
+
+    # --- Validation: SHT round-trip ---
+    print('\n[2] Validating SHT round-trip...')
+    # Use function within bandwidth (l <= LMAX)
+    f_test = (torch.cos(theta) + 0.3 * torch.sin(theta) ** 2 * torch.cos(2 * phi)).squeeze().double()
+    coeffs_test = grid_to_sh(f_test, sht)
+    f_recon = sh_to_grid(coeffs_test, isht).double()
+    rel_error = torch.norm(f_recon - f_test) / torch.norm(f_test)
+    print(f'  Relative error: {rel_error.item():.2e}')
+    if rel_error > 1e-4:
+        print('  Warning: SHT round-trip error is high!')
+
+    # --- Initial condition ---
+    print('\n[3] Setting initial condition (random noise for pattern formation)...')
+    # Swift-Hohenberg grows patterns from small random perturbations
+    torch.manual_seed(42)
+    f0_grid = 0.2 * torch.randn(NLAT, NLON, device=device, dtype=torch.float64)
+
+    # Convert to spectral space (simulation happens in spectral space)
+    coeffs = grid_to_sh(f0_grid, sht)
+    f0_grid = sh_to_grid(coeffs, isht)  # Bandlimited version
+    print(f'  f0 shape: {f0_grid.shape}, range: [{f0_grid.min().item():.3f}, {f0_grid.max().item():.3f}]')
+
+    # --- PDE simulation ---
+    print('\n[4] Simulating advected Swift-Hohenberg equation...')
+    n_coarse = int(T_TOTAL / DT_COARSE) + 1
+    steps_per_coarse = int(DT_COARSE / DT_FINE)
+
+    def rhs_fn(c: torch.Tensor) -> torch.Tensor:
+        return advected_sh_rhs(c, sht, isht, l_lap, m_vec, R_PARAM, L0_TARGET, OMEGA)
+
+    # Collect snapshots (store grid fields for visualization)
+    snapshots: list[torch.Tensor] = [f0_grid.clone()]
+    coarse_times: list[float] = [0.0]
+
+    # Time stepping in spectral space
+    for i in range(1, n_coarse):
+        # Fine time stepping (RK4 in spectral space)
+        for _ in range(steps_per_coarse):
+            coeffs = rk4_step_spectral(coeffs, DT_FINE, rhs_fn)
+
+        # Convert to grid for snapshot storage and monitoring
+        f_grid = sh_to_grid(coeffs, isht)
+        f_max = torch.abs(f_grid).max().item()
+
+        # Check for blowup
+        if f_max > 100:
+            print(f'  ERROR: PDE blew up at t={i * DT_COARSE:.3f}, max|f|={f_max:.1f}')
+            return
+
+        snapshots.append(f_grid.clone())
+        coarse_times.append(i * DT_COARSE)
+
+        if i % 5 == 0:
+            print(f'  t={coarse_times[-1]:.2f}, max|f|={f_max:.3f}')
+
+    print(f'  Collected {len(snapshots)} coarse snapshots')
+
+    # --- Build lifts ---
+    print('\n[5] Building lifted feature vectors...')
+    lifts: list[torch.Tensor] = []
+    for snap in snapshots:
+        coeffs = grid_to_sh(snap.double(), sht)
+        lift = build_lift(coeffs, bsp_module, LAM)
+        lifts.append(lift)
+
+    lift_dim = lifts[0].shape[0]
+    print(f'  Lift dimension: {lift_dim}')
+
+    # --- Train Koopman generator ---
+    print('\n[6] Training Koopman generator L...')
+    L = train_koopman_generator(lifts, DT_COARSE, GAMMA, LR, TRAIN_STEPS, device)
+    print(f'  L shape: {L.shape}, ||L||_F = {torch.norm(L).item():.4f}')
+
+    # --- Substep predictions ---
+    print('\n[7] Generating substep predictions...')
+    # Pick a coarse interval in the middle for visualization
+    mid_idx = len(snapshots) // 2
+    phi_start = lifts[mid_idx].float()
+
+    substep_lifts = substep_rollout(phi_start, L, DT_COARSE, NUM_SUBSTEPS)
+
+    # Decode lifts to spatial fields
+    truth_fields: list[np.ndarray] = []
+    pred_fields: list[np.ndarray] = []
+    substep_times: list[float] = []
+
+    t_start = coarse_times[mid_idx]
+    delta = DT_COARSE / NUM_SUBSTEPS
+
+    for j, lift_pred in enumerate(substep_lifts):
+        # Predicted field
+        coeffs_pred = lift_to_sh(lift_pred, LMAX, LMAX)
+        f_pred = sh_to_grid(coeffs_pred, isht).detach().cpu().numpy()
+        pred_fields.append(f_pred)
+
+        # True field: simulate from mid_idx snapshot in spectral space
+        t_sub = t_start + j * delta
+        substep_times.append(t_sub)
+
+        # Get true field at this substep time
+        coeffs_sub = grid_to_sh(snapshots[mid_idx], sht)
+        n_fine_steps = int(j * delta / DT_FINE)
+        for _ in range(n_fine_steps):
+            coeffs_sub = rk4_step_spectral(coeffs_sub, DT_FINE, rhs_fn)
+        f_true = sh_to_grid(coeffs_sub, isht)
+        truth_fields.append(f_true.detach().cpu().numpy())
+
+    # --- Validation: intermediate prediction quality ---
+    print('\n[8] Validating intermediate predictions...')
+    mid_sub_idx = NUM_SUBSTEPS // 2
+    truth_mid = truth_fields[mid_sub_idx]
+    pred_mid = pred_fields[mid_sub_idx]
+    mse_koopman = np.mean((pred_mid - truth_mid) ** 2)
+
+    # Linear interpolation baseline
+    f_start_np = snapshots[mid_idx].detach().cpu().numpy()
+    f_end_np = snapshots[mid_idx + 1].detach().cpu().numpy()
+    f_linear = 0.5 * (f_start_np + f_end_np)
+    mse_linear = np.mean((f_linear - truth_mid) ** 2)
+
+    print(f'  Midpoint MSE (Koopman): {mse_koopman:.6e}')
+    print(f'  Midpoint MSE (linear):  {mse_linear:.6e}')
+    if mse_koopman < mse_linear:
+        print('  Koopman beats linear interpolation!')
+    else:
+        print('  Note: Linear interpolation is competitive (normal for smooth dynamics)')
+
+    # --- Static comparison plot ---
+    print('\n[9] Saving static comparison plot (sphere)...')
+    static_path = out_dir / f'comparison_t{substep_times[mid_sub_idx]:.4f}.png'
+    plot_comparison_sphere(truth_mid, pred_mid, substep_times[mid_sub_idx], static_path)
+    print(f'  Saved: {static_path}')
+
+    # --- Animation frames ---
+    print('\n[10] Saving animation frames...')
+    save_animation_frames(truth_fields, pred_fields, substep_times, frames_dir)
+
+    # --- Create MP4 ---
+    print('\n[11] Creating MP4 animation...')
+    mp4_path = out_dir / 'koopman_animation.mp4'
+    run_ffmpeg(frames_dir, mp4_path)
+
+    # --- Summary ---
+    print('\n' + '=' * 70)
+    print('SUMMARY')
+    print('=' * 70)
+    print(f'  Outputs saved to: {out_dir.absolute()}')
+    print(f'  - Static plot: {static_path.name}')
+    print(f'  - Frames: {frames_dir.name}/')
+    print(f'  - Animation: {mp4_path.name}')
+    print(f'  Lift dimension: {lift_dim}')
+    print(f'  Koopman generator: {L.shape[0]}x{L.shape[1]}')
+    print('=' * 70)
+
+
+if __name__ == '__main__':
+    main()

From b8cf26af5c4ac9eb9207bdfaa42fb4bb90484238 Mon Sep 17 00:00:00 2001
From: Johan Mathe <johan.mathe@gmail.com>
Date: Thu, 22 Jan 2026 14:42:57 -0800
Subject: [PATCH 6/6] Update toy bispectrum

---
 examples/toy_koopman_bispectrum.py | 562 +++++++++++++++++++++++++----
 1 file changed, 485 insertions(+), 77 deletions(-)

diff --git a/examples/toy_koopman_bispectrum.py b/examples/toy_koopman_bispectrum.py
index adcc05f..5362e94 100644
--- a/examples/toy_koopman_bispectrum.py
+++ b/examples/toy_koopman_bispectrum.py
@@ -19,7 +19,7 @@
 
 from __future__ import annotations
 
-import subprocess
+import subprocess  # nosec B404 - used for local ffmpeg invocation with fixed args
 from collections.abc import Callable
 from pathlib import Path
 from typing import TYPE_CHECKING
@@ -42,17 +42,19 @@
 LMAX = 5  # Max SH degree (CG limit)
 # Advected Swift-Hohenberg parameters
 DT_FINE = 5e-4  # Time step (larger is safe with spectral methods)
-DT_COARSE = 0.1  # Snapshot interval
-T_TOTAL = 5.0  # Total simulation time
+DT_COARSE = 0.05  # Snapshot interval (reduced for more training pairs)
+T_TOTAL = 8.0  # Total simulation time (includes growth + saturation)
+T_TRAIN_START = 3.5  # Start training from saturation regime (patterns formed)
 R_PARAM = 1.0  # Instability parameter (strength of pattern growth)
 L0_TARGET = 2  # Target degree for instability (l=2 modes grow)
 OMEGA = 2.0  # Rotation speed (radians per unit time)
-LAM = 0.1  # Bispectrum weight in lift
-GAMMA = 1e-4  # Frobenius regularization
+LAM = 1.0  # Bispectrum weight in lift (equal weight to SH coefficients)
+GAMMA = 1e-3  # Frobenius regularization
 LR = 0.01  # Adam learning rate
 TRAIN_STEPS = 500  # Koopman training iterations
 NUM_SUBSTEPS = 10  # Substeps between coarse snapshots
 EPS = 0.1  # sinθ clamping (larger to avoid pole instabilities)
+PCA_DIM = 50  # Dimension for PCA projection of full lift (reduces overfitting)
 
 
 # =============================================================================
@@ -232,6 +234,21 @@ def build_lift(
     return torch.cat([sh_part, bsp_part])
 
 
+def build_lift_no_bispectrum(coeffs: torch.Tensor) -> torch.Tensor:
+    """Build lifted feature vector using ONLY SH coefficients (no bispectrum).
+
+    This serves as a baseline to compare against the full bispectrum lift.
+
+    Args:
+        coeffs: (1, L, M) complex SH coefficients
+
+    Returns:
+        (2*L*M,) real feature vector [Re(coeffs), Im(coeffs)]
+    """
+    c_flat = coeffs.flatten()  # complex
+    return torch.cat([c_flat.real, c_flat.imag])
+
+
 def lift_to_sh(
     lift: torch.Tensor,
     lmax: int,
@@ -264,6 +281,7 @@ def train_koopman_generator(
     lr: float,
     steps: int,
     device: torch.device,
+    antisymmetric: bool = True,
 ) -> torch.Tensor:
     """Learn Koopman generator L via gradient descent.
 
@@ -276,13 +294,21 @@ def train_koopman_generator(
         lr: Learning rate
         steps: Number of optimization steps
         device: Torch device
+        antisymmetric: If True, constrain L to be antisymmetric (L = -L^T),
+            which guarantees bounded (oscillatory) dynamics with no growth/decay.
 
     Returns:
         (N, N) learned generator matrix
     """
     N = lifts[0].shape[0]
-    L = torch.zeros(N, N, device=device, dtype=torch.float32, requires_grad=True)
-    optimizer = torch.optim.Adam([L], lr=lr)
+
+    if antisymmetric:
+        # Parameterize L = A - A^T (antisymmetric, eigenvalues are purely imaginary)
+        A = torch.zeros(N, N, device=device, dtype=torch.float32, requires_grad=True)
+        optimizer = torch.optim.Adam([A], lr=lr)
+    else:
+        L_raw = torch.zeros(N, N, device=device, dtype=torch.float32, requires_grad=True)
+        optimizer = torch.optim.Adam([L_raw], lr=lr)
 
     # Stack pairs: (Phi_t, Phi_{t+dt})
     Phi_t = torch.stack([lift.float() for lift in lifts[:-1]])  # (T-1, N)
@@ -292,6 +318,12 @@ def train_koopman_generator(
     for step in range(steps):
         optimizer.zero_grad()
 
+        # Construct L (antisymmetric if requested)
+        if antisymmetric:
+            L = A - A.T
+        else:
+            L = L_raw
+
         # exp(L*dt) @ Phi_t^T -> (N, T-1), then transpose
         expLdt = torch.matrix_exp(L * dt_coarse)
         pred = (expLdt @ Phi_t.T).T  # (T-1, N)
@@ -309,12 +341,18 @@ def train_koopman_generator(
         if step % 50 == 0 or step == steps - 1:
             print(f'  Step {step:4d}: loss={loss.item():.6e} (fit={loss_fit.item():.6e})')
 
+    # Final L
+    if antisymmetric:
+        L = (A - A.T).detach()
+    else:
+        L = L_raw.detach()
+
     final_loss = loss.item()
     print(f'  Training complete: initial={initial_loss:.6e} -> final={final_loss:.6e}')
     if final_loss > initial_loss / 10:
         print('  Warning: Loss did not decrease by 10x. Consider more steps or tuning.')
 
-    return L.detach()
+    return L
 
 
 def koopman_predict(phi_t: torch.Tensor, L: torch.Tensor, dt: float) -> torch.Tensor:
@@ -548,6 +586,287 @@ def save_animation_frames(
     return vmin, vmax
 
 
+def save_animation_frames_enhanced(
+    truth_fields: list[np.ndarray],
+    pred_fields: list[np.ndarray],
+    pred_fields_baseline: list[np.ndarray],
+    times: list[float],
+    frames_dir: Path,
+    lift_dim_pca: int,
+    lift_dim_full: int,
+    lift_dim_baseline: int,
+    pca_var_ratio: float,
+    lmax: int,
+) -> tuple[float, float]:
+    """Save enhanced animation frames with metrics panels and explanations.
+
+    Args:
+        truth_fields: List of (nlat, nlon) ground truth fields
+        pred_fields: List of (nlat, nlon) predicted fields (with bispectrum + PCA)
+        pred_fields_baseline: List of (nlat, nlon) baseline predictions (no bispectrum)
+        times: List of time values
+        frames_dir: Directory to save frames
+        lift_dim_pca: Dimension after PCA projection
+        lift_dim_full: Original full lift dimension (before PCA)
+        lift_dim_baseline: Dimension of the baseline lift (SH only)
+        pca_var_ratio: Percentage of variance explained by PCA
+        lmax: Maximum spherical harmonic degree
+
+    Returns:
+        (vmin, vmax) global colorbar limits used
+    """
+    frames_dir.mkdir(parents=True, exist_ok=True)
+
+    # Compute global colorbar limits
+    all_data = truth_fields + pred_fields + pred_fields_baseline
+    vmin = min(f.min() for f in all_data)
+    vmax = max(f.max() for f in all_data)
+
+    # Precompute all MSEs and errors for the time series plot
+    mses = []
+    mses_baseline = []
+    max_vals_truth = []
+    max_vals_pred = []
+    all_errors = []
+    all_errors_baseline = []
+    for truth, pred, pred_base in zip(
+        truth_fields, pred_fields, pred_fields_baseline, strict=True
+    ):
+        error = pred - truth
+        error_base = pred_base - truth
+        mses.append(np.mean(error**2))
+        mses_baseline.append(np.mean(error_base**2))
+        max_vals_truth.append(np.abs(truth).max())
+        max_vals_pred.append(np.abs(pred).max())
+        all_errors.append(error)
+        all_errors_baseline.append(error_base)
+
+    # Global error scale for consistent normalization across all frames
+    global_err_max = max(
+        max(np.abs(e).max() for e in all_errors),
+        max(np.abs(e).max() for e in all_errors_baseline),
+        1e-10,
+    )
+
+    nlat, nlon = truth_fields[0].shape
+
+    for i, (truth, pred, pred_base, t) in enumerate(
+        zip(truth_fields, pred_fields, pred_fields_baseline, times, strict=True)
+    ):
+        frame_path = frames_dir / f'frame_{i:04d}.png'
+        _plot_frame_enhanced(
+            truth,
+            pred,
+            pred_base,
+            t,
+            times,
+            mses,
+            mses_baseline,
+            max_vals_truth,
+            max_vals_pred,
+            i,
+            frame_path,
+            vmin,
+            vmax,
+            global_err_max,
+            nlat,
+            nlon,
+            lift_dim_pca,
+            lift_dim_full,
+            lift_dim_baseline,
+            pca_var_ratio,
+            lmax,
+        )
+
+    print(f'Saved {len(times)} frames to {frames_dir}')
+    return vmin, vmax
+
+
+def _plot_frame_enhanced(
+    truth: np.ndarray,
+    pred: np.ndarray,
+    pred_baseline: np.ndarray,
+    t: float,
+    all_times: list[float],
+    all_mses: list[float],
+    all_mses_baseline: list[float],
+    max_vals_truth: list[float],
+    max_vals_pred: list[float],
+    frame_idx: int,
+    out_path: Path,
+    vmin: float,
+    vmax: float,
+    global_err_max: float,
+    nlat: int,
+    nlon: int,
+    lift_dim_pca: int,
+    lift_dim_full: int,
+    lift_dim_baseline: int,
+    pca_var_ratio: float,
+    lmax: int,
+) -> None:
+    """Plot a single enhanced frame with spheres, metrics, and explanation."""
+    error = pred - truth
+    error_baseline = pred_baseline - truth
+    mse = all_mses[frame_idx]
+    mse_baseline = all_mses_baseline[frame_idx]
+    # Use global error scale for consistent normalization across all frames
+    err_abs = global_err_max
+
+    # Create figure with GridSpec for complex layout (3 rows)
+    fig = plt.figure(figsize=(22, 14), dpi=100)
+
+    # Row 1: Truth + With Bispectrum + Error (with bisp)
+    ax_truth = fig.add_subplot(3, 4, 1, projection='3d')
+    ax_pred = fig.add_subplot(3, 4, 2, projection='3d')
+    ax_error = fig.add_subplot(3, 4, 3, projection='3d')
+
+    # Row 1: Baseline (no bisp) + Error (no bisp)
+    ax_pred_base = fig.add_subplot(3, 4, 5, projection='3d')
+    ax_error_base = fig.add_subplot(3, 4, 6, projection='3d')
+
+    # Plot spheres - Row 1
+    plot_sphere(truth, ax_truth, 'Ground Truth (PDE)', vmin, vmax, nlat, nlon)
+    plot_sphere(pred, ax_pred, f'Bispectrum+PCA (MSE={mse:.2e})', vmin, vmax, nlat, nlon)
+    plot_sphere(error, ax_error, 'Error (Bisp+PCA)', -err_abs, err_abs, nlat, nlon)
+
+    # Plot spheres - Row 2 (baseline)
+    plot_sphere(
+        pred_baseline, ax_pred_base, f'SH Only (MSE={mse_baseline:.2e})', vmin, vmax, nlat, nlon
+    )
+    plot_sphere(error_baseline, ax_error_base, 'Error (SH Only)', -err_abs, err_abs, nlat, nlon)
+
+    # Top right: Model explanation
+    ax_text = fig.add_subplot(3, 4, 4)
+    ax_text.axis('off')
+    improvement = (mse_baseline - mse) / mse_baseline * 100 if mse_baseline > 0 else 0
+
+    # Compute irrep stats for lmax
+    n_sh_modes = (lmax + 1) ** 2  # Total SH modes (all m)
+    n_l_pairs = (lmax + 1) * (lmax + 2) // 2  # (l1, l2) pairs with l1 <= l2
+    n_bisp = lift_dim_full - lift_dim_baseline  # Bispectrum features (real+imag)
+
+    explanation = (
+        r'$\bf{SO(3)\ Bispectrum\ on\ S^2}$'
+        '\n\n'
+        r'$\bf{PDE:}$ Advected Swift-Hohenberg'
+        '\n'
+        r'$\partial_t f = rf - (1+\nabla^2)^2 f - f^3 + \Omega \partial_\phi f$'
+        '\n\n'
+        r'$\bf{Irrep\ Statistics:}$'
+        '\n'
+        f'• $\\ell_{{max}}$={lmax}, SH modes: {n_sh_modes}\n'
+        f'• $(\\ell_1,\\ell_2)$ pairs: {n_l_pairs}\n'
+        f'• Bispectrum values: {n_bisp // 2} complex\n'
+        f'• Improvement: {improvement:.1f}%'
+    )
+    ax_text.text(
+        0.05,
+        0.95,
+        explanation,
+        transform=ax_text.transAxes,
+        fontsize=11,
+        verticalalignment='top',
+        fontfamily='monospace',
+        bbox={'boxstyle': 'round', 'facecolor': 'wheat', 'alpha': 0.8},
+    )
+
+    # Middle right: Method comparison box
+    ax_method = fig.add_subplot(3, 4, 8)
+    ax_method.axis('off')
+    method_text = (
+        r'$\bf{SO(3)\ Bispectrum:}$'
+        '\n'
+        r'$B_{\ell_1\ell_2}^\ell = (F_{\ell_1} \otimes F_{\ell_2}) \cdot C_{\ell_1\ell_2}^\ell \cdot F_\ell^*$'
+        '\n'
+        '  Clebsch-Gordan coupling\n'
+        '  SO(3)-invariant features\n'
+        '\n'
+        r'$\bf{Lift\ Dimensions:}$'
+        '\n'
+        f'  Full: {lift_dim_full}→{lift_dim_pca} (PCA)\n'
+        f'  Baseline: {lift_dim_baseline} (SH only)\n'
+        '\n'
+        r'$\bf{Koopman:}$ $\Phi_{t+\Delta} = e^{L\Delta} \Phi_t$'
+        '\n'
+        '  L: antisymmetric generator\n'
+        '  One-step predictions'
+    )
+    ax_method.text(
+        0.05,
+        0.95,
+        method_text,
+        transform=ax_method.transAxes,
+        fontsize=10,
+        verticalalignment='top',
+        fontfamily='monospace',
+        bbox={'boxstyle': 'round', 'facecolor': 'lightblue', 'alpha': 0.8},
+    )
+
+    # Bottom row: MSE comparison plot
+    ax_mse = fig.add_subplot(3, 4, 9)
+    ax_mse.plot(all_times, all_mses, 'b-', linewidth=2.5, label='With Bispectrum')
+    ax_mse.plot(all_times, all_mses_baseline, 'r--', linewidth=2.5, label='No Bispectrum')
+    ax_mse.axvline(t, color='gray', linestyle=':', linewidth=2)
+    ax_mse.scatter([t], [mse], color='b', s=120, zorder=5, edgecolors='white', linewidth=2)
+    ax_mse.scatter(
+        [t], [mse_baseline], color='r', s=120, zorder=5, edgecolors='white', linewidth=2
+    )
+    ax_mse.set_xlabel('Time', fontsize=12)
+    ax_mse.set_ylabel('MSE', fontsize=12)
+    ax_mse.set_title('MSE Comparison: Bispectrum vs Baseline', fontsize=12, fontweight='bold')
+    ax_mse.legend(loc='upper right', fontsize=10)
+    ax_mse.grid(True, alpha=0.3)
+    ax_mse.set_xlim(all_times[0], all_times[-1])
+    ax_mse.set_ylim(0, max(max(all_mses), max(all_mses_baseline)) * 1.2)
+
+    # Bottom: Improvement over time
+    ax_imp = fig.add_subplot(3, 4, 10)
+    improvements = [
+        (mb - m) / mb * 100 if mb > 0 else 0
+        for m, mb in zip(all_mses, all_mses_baseline, strict=True)
+    ]
+    ax_imp.fill_between(all_times, improvements, alpha=0.3, color='green')
+    ax_imp.plot(all_times, improvements, 'g-', linewidth=2.5)
+    ax_imp.axvline(t, color='gray', linestyle=':', linewidth=2)
+    ax_imp.scatter(
+        [t], [improvement], color='green', s=120, zorder=5, edgecolors='white', linewidth=2
+    )
+    ax_imp.axhline(0, color='black', linestyle='-', linewidth=0.5)
+    ax_imp.set_xlabel('Time', fontsize=12)
+    ax_imp.set_ylabel('Improvement (%)', fontsize=12)
+    ax_imp.set_title('Bispectrum Improvement Over Baseline', fontsize=12, fontweight='bold')
+    ax_imp.grid(True, alpha=0.3)
+    ax_imp.set_xlim(all_times[0], all_times[-1])
+
+    # Bottom: Flat projections
+    ax_flat_truth = fig.add_subplot(3, 4, 11)
+    im_flat = ax_flat_truth.imshow(truth, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto')
+    ax_flat_truth.set_title('Truth (Mercator)', fontsize=11)
+    ax_flat_truth.set_xlabel('Longitude')
+    ax_flat_truth.set_ylabel('Latitude')
+    fig.colorbar(im_flat, ax=ax_flat_truth, shrink=0.7)
+
+    ax_flat_pred = fig.add_subplot(3, 4, 12)
+    im_flat2 = ax_flat_pred.imshow(pred, cmap='RdBu_r', vmin=vmin, vmax=vmax, aspect='auto')
+    ax_flat_pred.set_title('With Bispectrum (Mercator)', fontsize=11)
+    ax_flat_pred.set_xlabel('Longitude')
+    fig.colorbar(im_flat2, ax=ax_flat_pred, shrink=0.7)
+
+    # Main title
+    plt.suptitle(
+        f'Koopman with SO(3) Bispectrum on S²    |    t = {t:.3f}    |    '
+        f'Bisp MSE: {mse:.2e}    |    Baseline MSE: {mse_baseline:.2e}',
+        fontsize=14,
+        fontweight='bold',
+        y=0.99,
+    )
+
+    plt.tight_layout(rect=[0, 0, 1, 0.97])
+    plt.savefig(out_path, dpi=100, bbox_inches='tight', facecolor='white')
+    plt.close()
+
+
 def run_ffmpeg(frames_dir: Path, out_mp4: Path) -> bool:
     """Stitch frames into MP4 using ffmpeg.
 
@@ -575,7 +894,7 @@ def run_ffmpeg(frames_dir: Path, out_mp4: Path) -> bool:
         str(out_mp4),
     ]
     try:
-        subprocess.run(cmd, check=True, capture_output=True)
+        subprocess.run(cmd, check=True, capture_output=True)  # nosec B603 - cmd is fixed
         print(f'Saved animation to {out_mp4}')
         return True
     except FileNotFoundError:
@@ -593,7 +912,7 @@ def run_ffmpeg(frames_dir: Path, out_mp4: Path) -> bool:
 def main() -> None:
     """Run the toy Koopman bispectrum demo."""
     print('=' * 70)
-    print('Toy Koopman Bispectrum Demo')
+    print('Koopman with SO(3) Bispectrum on S² Demo')
     print('=' * 70)
 
     # Device selection
@@ -613,7 +932,9 @@ def main() -> None:
     # torch-harmonics uses lmax/mmax as the number of modes (0 to lmax-1)
     # So for max degree LMAX, we need lmax=LMAX+1
     sht = RealSHT(NLAT, NLON, lmax=LMAX + 1, mmax=LMAX + 1, grid='equiangular', norm='ortho')
-    isht = InverseRealSHT(NLAT, NLON, lmax=LMAX + 1, mmax=LMAX + 1, grid='equiangular', norm='ortho')
+    isht = InverseRealSHT(
+        NLAT, NLON, lmax=LMAX + 1, mmax=LMAX + 1, grid='equiangular', norm='ortho'
+    )
     sht = sht.to(device).double()
     isht = isht.to(device).double()
 
@@ -633,7 +954,9 @@ def main() -> None:
     # --- Validation: SHT round-trip ---
     print('\n[2] Validating SHT round-trip...')
     # Use function within bandwidth (l <= LMAX)
-    f_test = (torch.cos(theta) + 0.3 * torch.sin(theta) ** 2 * torch.cos(2 * phi)).squeeze().double()
+    f_test = (
+        (torch.cos(theta) + 0.3 * torch.sin(theta) ** 2 * torch.cos(2 * phi)).squeeze().double()
+    )
     coeffs_test = grid_to_sh(f_test, sht)
     f_recon = sh_to_grid(coeffs_test, isht).double()
     rel_error = torch.norm(f_recon - f_test) / torch.norm(f_test)
@@ -650,7 +973,9 @@ def main() -> None:
     # Convert to spectral space (simulation happens in spectral space)
     coeffs = grid_to_sh(f0_grid, sht)
     f0_grid = sh_to_grid(coeffs, isht)  # Bandlimited version
-    print(f'  f0 shape: {f0_grid.shape}, range: [{f0_grid.min().item():.3f}, {f0_grid.max().item():.3f}]')
+    print(
+        f'  f0 shape: {f0_grid.shape}, range: [{f0_grid.min().item():.3f}, {f0_grid.max().item():.3f}]'
+    )
 
     # --- PDE simulation ---
     print('\n[4] Simulating advected Swift-Hohenberg equation...')
@@ -687,85 +1012,168 @@ def rhs_fn(c: torch.Tensor) -> torch.Tensor:
 
     print(f'  Collected {len(snapshots)} coarse snapshots')
 
-    # --- Build lifts ---
+    # --- Build lifts (with bispectrum and baseline without) ---
     print('\n[5] Building lifted feature vectors...')
+
+    # Full lift: SH + bispectrum
     lifts: list[torch.Tensor] = []
+    # Baseline lift: SH only (no bispectrum)
+    lifts_baseline: list[torch.Tensor] = []
+
     for snap in snapshots:
         coeffs = grid_to_sh(snap.double(), sht)
         lift = build_lift(coeffs, bsp_module, LAM)
         lifts.append(lift)
-
-    lift_dim = lifts[0].shape[0]
-    print(f'  Lift dimension: {lift_dim}')
-
-    # --- Train Koopman generator ---
-    print('\n[6] Training Koopman generator L...')
-    L = train_koopman_generator(lifts, DT_COARSE, GAMMA, LR, TRAIN_STEPS, device)
-    print(f'  L shape: {L.shape}, ||L||_F = {torch.norm(L).item():.4f}')
-
-    # --- Substep predictions ---
-    print('\n[7] Generating substep predictions...')
-    # Pick a coarse interval in the middle for visualization
-    mid_idx = len(snapshots) // 2
-    phi_start = lifts[mid_idx].float()
-
-    substep_lifts = substep_rollout(phi_start, L, DT_COARSE, NUM_SUBSTEPS)
-
-    # Decode lifts to spatial fields
+        lift_baseline = build_lift_no_bispectrum(coeffs)
+        lifts_baseline.append(lift_baseline)
+
+    lift_dim_full = lifts[0].shape[0]
+    lift_dim_baseline = lifts_baseline[0].shape[0]
+    print(f'  Full lift dimension (SH + Bispectrum): {lift_dim_full}')
+    print(f'  Baseline lift dimension (SH only): {lift_dim_baseline}')
+
+    # --- PCA projection for full lifts (reduces overfitting) ---
+    print('\n[5b] Applying PCA to full lifts...')
+    lifts_stack = torch.stack(lifts, dim=0).float()  # [N, lift_dim_full]
+
+    # Center the data
+    lift_mean = lifts_stack.mean(dim=0)
+    lifts_centered = lifts_stack - lift_mean
+
+    # Compute PCA via SVD
+    U, S, Vh = torch.linalg.svd(lifts_centered, full_matrices=False)
+    # Keep top PCA_DIM components
+    pca_dim = min(PCA_DIM, lift_dim_full, len(lifts))
+    V_pca = Vh[:pca_dim, :].T  # [lift_dim_full, pca_dim]
+
+    # Project lifts to PCA space
+    lifts_pca = [((lift.float() - lift_mean) @ V_pca) for lift in lifts]
+
+    # Compute variance explained
+    total_var = (S**2).sum().item()
+    explained_var = (S[:pca_dim] ** 2).sum().item()
+    var_ratio = explained_var / total_var * 100
+
+    print(f'  PCA: {lift_dim_full} -> {pca_dim} dims ({var_ratio:.1f}% variance explained)')
+    lift_dim = pca_dim  # Use PCA dimension for Koopman
+
+    # --- Train Koopman generators (on saturated regime only) ---
+    print('\n[6] Training Koopman generators...')
+    # Find index where training starts (after pattern saturation)
+    train_start_idx = int(T_TRAIN_START / DT_COARSE)
+    lifts_train = lifts_pca[train_start_idx:]  # Use PCA-projected lifts
+    lifts_train_baseline = lifts_baseline[train_start_idx:]
+    print(f'  Training on saturated regime: t >= {T_TRAIN_START} ({len(lifts_train)} snapshots)')
+
+    print('  [6a] Training WITH bispectrum (PCA-projected)...')
+    L = train_koopman_generator(lifts_train, DT_COARSE, GAMMA, LR, TRAIN_STEPS, device)
+    print(f'       L shape: {L.shape}, ||L||_F = {torch.norm(L).item():.4f}')
+
+    print('  [6b] Training WITHOUT bispectrum (baseline)...')
+    L_baseline = train_koopman_generator(
+        lifts_train_baseline, DT_COARSE, GAMMA, LR, TRAIN_STEPS, device
+    )
+    print(
+        f'       L_baseline shape: {L_baseline.shape}, ||L||_F = {torch.norm(L_baseline).item():.4f}'
+    )
+
+    # --- Generate predictions for saturated timeline ---
+    print('\n[7] Generating Koopman predictions for saturated regime...')
+
+    # Generate predictions using ONE-STEP Koopman from TRUE lifts
+    # This shows model accuracy without error compounding
     truth_fields: list[np.ndarray] = []
-    pred_fields: list[np.ndarray] = []
-    substep_times: list[float] = []
-
-    t_start = coarse_times[mid_idx]
-    delta = DT_COARSE / NUM_SUBSTEPS
-
-    for j, lift_pred in enumerate(substep_lifts):
-        # Predicted field
-        coeffs_pred = lift_to_sh(lift_pred, LMAX, LMAX)
+    pred_fields: list[np.ndarray] = []  # With bispectrum (PCA)
+    pred_fields_baseline: list[np.ndarray] = []  # Without bispectrum
+    frame_times: list[float] = []
+
+    # Precompute exp(L * dt) for one-step prediction
+    expLdt = torch.matrix_exp(L * DT_COARSE)
+    expLdt_baseline = torch.matrix_exp(L_baseline * DT_COARSE)
+
+    # Work with saturated regime snapshots only
+    snapshots_sat = snapshots[train_start_idx:]
+    times_sat = coarse_times[train_start_idx:]
+    lifts_sat_pca = lifts_pca[train_start_idx:]  # PCA-projected full lifts
+    lifts_sat_baseline = lifts_baseline[train_start_idx:]
+
+    for i, (snap, t) in enumerate(zip(snapshots_sat, times_sat, strict=True)):
+        # Truth: directly from simulation
+        truth_fields.append(snap.detach().cpu().numpy())
+        frame_times.append(t)
+
+        # Prediction WITH bispectrum (PCA): one-step from TRUE previous lift
+        if i == 0:
+            pred_lift_pca = lifts_sat_pca[0].float()
+            pred_lift_baseline = lifts_sat_baseline[0].float()
+        else:
+            # Predict from TRUE previous lift (in PCA space)
+            prev_lift_pca = lifts_sat_pca[i - 1].float()
+            pred_lift_pca = expLdt @ prev_lift_pca
+
+            prev_lift_baseline = lifts_sat_baseline[i - 1].float()
+            pred_lift_baseline = expLdt_baseline @ prev_lift_baseline
+
+        # Decode full prediction: PCA space -> original space -> SH coeffs
+        # Project back from PCA: pred_lift_full = pred_lift_pca @ V_pca.T + lift_mean
+        pred_lift_full = pred_lift_pca @ V_pca.T + lift_mean
+        coeffs_pred = lift_to_sh(pred_lift_full, LMAX, LMAX)
         f_pred = sh_to_grid(coeffs_pred, isht).detach().cpu().numpy()
         pred_fields.append(f_pred)
 
-        # True field: simulate from mid_idx snapshot in spectral space
-        t_sub = t_start + j * delta
-        substep_times.append(t_sub)
-
-        # Get true field at this substep time
-        coeffs_sub = grid_to_sh(snapshots[mid_idx], sht)
-        n_fine_steps = int(j * delta / DT_FINE)
-        for _ in range(n_fine_steps):
-            coeffs_sub = rk4_step_spectral(coeffs_sub, DT_FINE, rhs_fn)
-        f_true = sh_to_grid(coeffs_sub, isht)
-        truth_fields.append(f_true.detach().cpu().numpy())
-
-    # --- Validation: intermediate prediction quality ---
-    print('\n[8] Validating intermediate predictions...')
-    mid_sub_idx = NUM_SUBSTEPS // 2
-    truth_mid = truth_fields[mid_sub_idx]
-    pred_mid = pred_fields[mid_sub_idx]
-    mse_koopman = np.mean((pred_mid - truth_mid) ** 2)
-
-    # Linear interpolation baseline
-    f_start_np = snapshots[mid_idx].detach().cpu().numpy()
-    f_end_np = snapshots[mid_idx + 1].detach().cpu().numpy()
-    f_linear = 0.5 * (f_start_np + f_end_np)
-    mse_linear = np.mean((f_linear - truth_mid) ** 2)
-
-    print(f'  Midpoint MSE (Koopman): {mse_koopman:.6e}')
-    print(f'  Midpoint MSE (linear):  {mse_linear:.6e}')
-    if mse_koopman < mse_linear:
-        print('  Koopman beats linear interpolation!')
-    else:
-        print('  Note: Linear interpolation is competitive (normal for smooth dynamics)')
+        # Decode baseline prediction
+        coeffs_pred_baseline = lift_to_sh(pred_lift_baseline, LMAX, LMAX)
+        f_pred_baseline = sh_to_grid(coeffs_pred_baseline, isht).detach().cpu().numpy()
+        pred_fields_baseline.append(f_pred_baseline)
+
+    print(
+        f'  Generated {len(frame_times)} frames from t={frame_times[0]:.2f} to t={frame_times[-1]:.2f}'
+    )
+
+    # --- Validation: prediction quality over time ---
+    print('\n[8] Validating predictions...')
+    mid_idx = len(frame_times) // 2
+
+    # With bispectrum
+    truth_mid = truth_fields[mid_idx]
+    pred_mid = pred_fields[mid_idx]
+    mse_mid = np.mean((pred_mid - truth_mid) ** 2)
+    mse_final = np.mean((pred_fields[-1] - truth_fields[-1]) ** 2)
+
+    # Baseline (no bispectrum)
+    pred_mid_baseline = pred_fields_baseline[mid_idx]
+    mse_mid_baseline = np.mean((pred_mid_baseline - truth_mid) ** 2)
+    mse_final_baseline = np.mean((pred_fields_baseline[-1] - truth_fields[-1]) ** 2)
+
+    print('  WITH Bispectrum:')
+    print(f'    MSE at t={frame_times[mid_idx]:.2f} (midpoint): {mse_mid:.6e}')
+    print(f'    MSE at t={frame_times[-1]:.2f} (final): {mse_final:.6e}')
+    print('  WITHOUT Bispectrum (baseline):')
+    print(f'    MSE at t={frame_times[mid_idx]:.2f} (midpoint): {mse_mid_baseline:.6e}')
+    print(f'    MSE at t={frame_times[-1]:.2f} (final): {mse_final_baseline:.6e}')
+    improvement = (mse_final_baseline - mse_final) / mse_final_baseline * 100
+    print(f'  Bispectrum improvement: {improvement:.1f}% lower MSE')
 
     # --- Static comparison plot ---
     print('\n[9] Saving static comparison plot (sphere)...')
-    static_path = out_dir / f'comparison_t{substep_times[mid_sub_idx]:.4f}.png'
-    plot_comparison_sphere(truth_mid, pred_mid, substep_times[mid_sub_idx], static_path)
+    static_path = out_dir / f'comparison_t{frame_times[mid_idx]:.4f}.png'
+    plot_comparison_sphere(truth_mid, pred_mid, frame_times[mid_idx], static_path)
     print(f'  Saved: {static_path}')
 
-    # --- Animation frames ---
-    print('\n[10] Saving animation frames...')
-    save_animation_frames(truth_fields, pred_fields, substep_times, frames_dir)
+    # --- Animation frames (enhanced with metrics and baseline comparison) ---
+    print('\n[10] Saving animation frames with metrics...')
+    save_animation_frames_enhanced(
+        truth_fields,
+        pred_fields,
+        pred_fields_baseline,
+        frame_times,
+        frames_dir,
+        lift_dim,  # PCA dimension
+        lift_dim_full,  # Original full dimension
+        lift_dim_baseline,
+        var_ratio,  # PCA variance explained
+        LMAX,
+    )
 
     # --- Create MP4 ---
     print('\n[11] Creating MP4 animation...')