raphasouthall · raphasouthall · Apr 16, 2026 · Apr 16, 2026
diff --git a/src/neurostack/attractor.py b/src/neurostack/attractor.py
@@ -60,6 +60,12 @@
 # convergence happens in fewer iterations.
 TOP_K_NEIGHBORS = 50
 
+# Per-level top-K. Coarse uses a wider neighbourhood so broad basins form;
+# fine uses a narrower one so high-β softmax doesn't funnel every note into
+# a handful of hub attractors (see issue #33 for the inversion this fixes).
+TOP_K_COARSE = 80
+TOP_K_FINE = 20
+
 # Minimum shared entities for a note-note edge (co-occurrence signal)
 MIN_SHARED = 2
 
@@ -217,6 +223,7 @@ def _attractor_convergence(
     beta: float,
     max_iter: int | None = None,
     threshold: float = CONVERGENCE_THRESHOLD,
+    top_k: int | None = None,
 ) -> np.ndarray:
     """Run Hopfield-style attractor dynamics on the similarity matrix.
 
@@ -226,16 +233,21 @@ def _attractor_convergence(
 
     state_i(t+1) = softmax(β · S_i · state(t))
 
-    For large matrices (n > TOP_K_NEIGHBORS), S is sparsified to keep only
-    the top-k neighbors per row, and iterations are scaled adaptively.
+    For large matrices (n > top_k), S is sparsified to keep only the top-k
+    neighbors per row, and iterations are scaled adaptively. Callers can
+    override top_k per level — coarse/fine partitions need different
+    neighbourhood widths to avoid hub monopolisation at high β (issue #33).
 
     Returns the converged state matrix (n × n).
     """
     n = S.shape[0]
 
+    if top_k is None:
+        top_k = TOP_K_NEIGHBORS
+
     # Sparsify for large matrices — keeps convergence fast
-    if n > TOP_K_NEIGHBORS:
-        S = _sparsify_top_k(S, TOP_K_NEIGHBORS)
+    if n > top_k:
+        S = _sparsify_top_k(S, top_k)
 
     if max_iter is None:
         max_iter = _adaptive_max_iter(n)
@@ -273,15 +285,18 @@ def _attractor_convergence(
 def _assign_communities(
     state: np.ndarray,
     note_paths: list[str],
+    merge_singletons: bool = True,
 ) -> dict[int, list[str]]:
     """Assign notes to communities based on converged attractor states.
 
     Notes that converge to similar states belong to the same community.
     We assign each note to the index of its dominant attractor (argmax of
     its state vector), then group by attractor.
 
-    Applies lateral inhibition: singleton communities (only 1 note) are
-    merged into the nearest non-singleton community.
+    When merge_singletons is True (default), lateral inhibition folds every
+    1-note basin into the nearest non-singleton community. At high β this
+    wipes out the narrow basins that a fine partition is supposed to
+    expose — pass False at β_FINE so singletons survive (issue #33).
     """
     n = len(note_paths)
 
@@ -293,6 +308,9 @@ def _assign_communities(
     for i in range(n):
         raw_communities[int(assignments[i])].append(note_paths[i])
 
+    if not merge_singletons:
+        return {i: notes for i, notes in enumerate(raw_communities.values())}
+
     # Lateral inhibition: merge singletons into nearest non-singleton
     non_singletons = {
         k: v for k, v in raw_communities.items() if len(v) > 1
@@ -325,6 +343,53 @@ def _assign_communities(
     return {i: notes for i, notes in enumerate(communities.values())}
 
 
+def _modularity(
+    S: np.ndarray,
+    note_paths: list[str],
+    communities: dict[int, list[str]],
+) -> float:
+    """Weighted Newman modularity Q of a partition on similarity matrix S.
+
+        Q = (1 / 2m) Σ_ij [A_ij - k_i·k_j / 2m] · δ(c_i, c_j)
+
+    Uses the pre-sparsification S so the metric reflects the partition's
+    fit to the full similarity structure, not the truncated neighbourhood.
+    Returns 0.0 for degenerate matrices (zero total weight).
+    """
+    n = S.shape[0]
+    # Ensure symmetry: modularity is defined on an undirected weighted graph
+    A = (S + S.T) / 2.0
+    two_m = float(A.sum())
+    if two_m <= 0.0 or n == 0:
+        return 0.0
+
+    k = A.sum(axis=1)  # strength (weighted degree) per node
+    path_to_idx = {p: i for i, p in enumerate(note_paths)}
+
+    q = 0.0
+    for members in communities.values():
+        idx = np.array(
+            [path_to_idx[p] for p in members if p in path_to_idx],
+            dtype=np.int64,
+        )
+        if idx.size == 0:
+            continue
+        # Sum of A within the community block minus expected under null model
+        block = A[np.ix_(idx, idx)]
+        k_block = k[idx]
+        q += float(block.sum()) - float(k_block.sum() ** 2) / two_m
+
+    return q / two_m
+
+
+def _size_stats(communities: dict[int, list[str]]) -> tuple[int, int, int, float]:
+    """Return (count, min_size, max_size, mean_size) for a partition."""
+    sizes = [len(v) for v in communities.values()]
+    if not sizes:
+        return 0, 0, 0, 0.0
+    return len(sizes), min(sizes), max(sizes), sum(sizes) / len(sizes)
+
+
 def _store_communities(
     conn: sqlite3.Connection,
     level: int,
@@ -349,6 +414,23 @@ def _store_communities(
         )
 
 
+def _store_level_stats(
+    conn: sqlite3.Connection,
+    level: int,
+    communities: dict[int, list[str]],
+    modularity: float,
+) -> None:
+    """Persist per-level aggregate stats (size distribution + modularity)."""
+    count, min_size, max_size, mean_size = _size_stats(communities)
+    now = datetime.now(timezone.utc).isoformat()
+    conn.execute(
+        "INSERT OR REPLACE INTO community_level_stats"
+        " (level, n_communities, min_size, max_size, mean_size,"
+        "  modularity, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?)",
+        (level, count, min_size, max_size, mean_size, modularity, now),
+    )
+
+
 def detect_communities(
     conn: sqlite3.Connection | None = None,
     db_path=None,
@@ -374,6 +456,7 @@ def detect_communities(
     # Clear existing
     conn.execute("DELETE FROM community_members")
     conn.execute("DELETE FROM communities")
+    conn.execute("DELETE FROM community_level_stats")
     conn.commit()
 
     # Persist entity co-occurrence weights from triples
@@ -417,26 +500,48 @@ def detect_communities(
     # ── Coarse communities (low β → broad basins) ──
     log.info(
         f"Running attractor convergence level 0"
-        f" (coarse, β={BETA_COARSE})..."
+        f" (coarse, β={BETA_COARSE}, top_k={TOP_K_COARSE})..."
+    )
+    state_coarse = _attractor_convergence(
+        S, beta=BETA_COARSE, top_k=TOP_K_COARSE,
     )
-    state_coarse = _attractor_convergence(S, beta=BETA_COARSE)
     communities_coarse = _assign_communities(state_coarse, note_paths)
     _store_communities(conn, level=0, communities=communities_coarse)
+    q_coarse = _modularity(S, note_paths, communities_coarse)
+    _store_level_stats(conn, 0, communities_coarse, q_coarse)
     n_coarse = len(communities_coarse)
 
     # ── Fine communities (high β → narrow basins) ──
+    # Narrower top_k keeps softmax basins narrow; merge_singletons=False
+    # preserves the 1-note basins that are the whole point at β=2.0
+    # (issue #33).
     log.info(
         f"Running attractor convergence level 1"
-        f" (fine, β={BETA_FINE})..."
+        f" (fine, β={BETA_FINE}, top_k={TOP_K_FINE})..."
+    )
+    state_fine = _attractor_convergence(
+        S, beta=BETA_FINE, top_k=TOP_K_FINE,
+    )
+    communities_fine = _assign_communities(
+        state_fine, note_paths, merge_singletons=False,
     )
-    state_fine = _attractor_convergence(S, beta=BETA_FINE)
-    communities_fine = _assign_communities(state_fine, note_paths)
     _store_communities(conn, level=1, communities=communities_fine)
+    q_fine = _modularity(S, note_paths, communities_fine)
+    _store_level_stats(conn, 1, communities_fine, q_fine)
     n_fine = len(communities_fine)
 
+    if q_fine <= q_coarse:
+        log.warning(
+            "Community hierarchy sanity check failed:"
+            f" Q(fine)={q_fine:.4f} <= Q(coarse)={q_coarse:.4f}."
+            " The fine partition is not a tighter fit than coarse —"
+            " expect n_fine > n_coarse and Q(fine) > Q(coarse)."
+        )
+
     conn.commit()
     log.info(
         f"Community detection done:"
-        f" {n_coarse} coarse, {n_fine} fine communities."
+        f" {n_coarse} coarse (Q={q_coarse:.4f}),"
+        f" {n_fine} fine (Q={q_fine:.4f}) communities."
     )
     return n_coarse, n_fine
diff --git a/src/neurostack/schema.py b/src/neurostack/schema.py
@@ -32,7 +32,7 @@ def __getattr__(name: str):
         return _db_path()
     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
-SCHEMA_VERSION = 13
+SCHEMA_VERSION = 14
 
 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -162,6 +162,19 @@ def __getattr__(name: str):
 
 CREATE INDEX IF NOT EXISTS idx_community_members_entity ON community_members(entity);
 
+-- Per-level community partition statistics (size distribution + modularity).
+-- One row per level — used by vault_stats to surface partition quality and
+-- to detect an inverted hierarchy (issue #33).
+CREATE TABLE IF NOT EXISTS community_level_stats (
+    level INTEGER PRIMARY KEY,
+    n_communities INTEGER,
+    min_size INTEGER,
+    max_size INTEGER,
+    mean_size REAL,
+    modularity REAL,
+    updated_at TEXT
+);
+
 -- Folder-level aggregate summaries for semantic context= boosting
 CREATE TABLE IF NOT EXISTS folder_summaries (
     folder_path TEXT PRIMARY KEY,
@@ -522,6 +535,20 @@ def __getattr__(name: str):
 MIGRATION_V13 = "-- vec index tables created by vecindex.ensure_vec_tables()"
 
 
+# Migration from v13 to v14: add community_level_stats table (issue #33)
+MIGRATION_V14 = """
+CREATE TABLE IF NOT EXISTS community_level_stats (
+    level INTEGER PRIMARY KEY,
+    n_communities INTEGER,
+    min_size INTEGER,
+    max_size INTEGER,
+    mean_size REAL,
+    modularity REAL,
+    updated_at TEXT
+);
+"""
+
+
 def _run_migrations(conn: sqlite3.Connection):
     """Run schema migrations if needed."""
     row = conn.execute("SELECT MAX(version) as v FROM schema_version").fetchone()
@@ -750,6 +777,19 @@ def _run_migrations(conn: sqlite3.Connection):
         # Vec tables are created by _init_vec_index() after migrations
         log.info("Migration to v13 complete.")
 
+    if current < 14:
+        log.info(
+            "Migrating schema v13 -> v14: "
+            "adding community_level_stats table..."
+        )
+        conn.executescript(MIGRATION_V14)
+        conn.execute(
+            "INSERT OR REPLACE INTO schema_version"
+            " VALUES (14)"
+        )
+        conn.commit()
+        log.info("Migration to v14 complete.")
+
 
 def get_db(db_path: Path | None = None) -> sqlite3.Connection:
     """Get a database connection, creating schema if needed."""

diff --git a/src/neurostack/tools/search_tools.py b/src/neurostack/tools/search_tools.py
@@ -22,6 +22,44 @@ def _cfg():
     return cfg.vault_root, cfg.embed_url
 
 
+def _community_level_stats(conn) -> list[dict]:
+    """Return per-level community partition stats (size distribution + modularity).
+
+    Reads community_level_stats populated by attractor.detect_communities.
+    Empty list if the table doesn't exist yet or no levels have been written.
+    """
+    try:
+        rows = conn.execute(
+            "SELECT level, n_communities, min_size, max_size, mean_size,"
+            " modularity FROM community_level_stats ORDER BY level"
+        ).fetchall()
+    except Exception:
+        return []
+    def _label(level: int) -> str:
+        if level == 0:
+            return "coarse"
+        if level == 1:
+            return "fine"
+        return f"level{level}"
+
+    return [
+        {
+            "level": r["level"],
+            "label": _label(r["level"]),
+            "n_communities": r["n_communities"],
+            "min_size": r["min_size"],
+            "max_size": r["max_size"],
+            "mean_size": (
+                round(r["mean_size"], 2) if r["mean_size"] is not None else None
+            ),
+            "modularity": (
+                round(r["modularity"], 4) if r["modularity"] is not None else None
+            ),
+        }
+        for r in rows
+    ]
+
+
 def _search_memories_for_results(query: str, workspace: str = None, limit: int = 3) -> list[dict]:
     """Search memories and return compact results for inclusion in vault_search."""
     try:
@@ -400,6 +438,7 @@ def vault_stats() -> dict:
         "communities_summarized": conn.execute(
             "SELECT COUNT(*) as c FROM communities WHERE summary IS NOT NULL"
         ).fetchone()["c"],
+        "community_levels": _community_level_stats(conn),
         "excitability": {
             "active": dormancy["active_count"],
             "dormant": dormancy["dormant_count"],