Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

### Added

- **Folder-path signal in community detection.** Embeddings treat "infrastructure" as one topic whether a note lives under `work/` or `home/`, so distinct organisational areas bled into one cluster. `_build_similarity_matrix` now blends a fourth channel — notes sharing a top-level folder prefix get a uniform similarity bump (`PATH_SIGNAL_WEIGHT`, default 0.3; `PATH_PREFIX_DEPTH`, default 1). Measured on a ~490-note vault: folder purity rose 0.68 → 0.85 (coarse) / 0.76 → 0.93 (fine) with modularity held or slightly improved. Degrades gracefully — root-level files form no path edges and a flat single-folder vault yields an all-zero signal; set the weight to 0 to disable.

### Fixed

- **Community hierarchy check no longer false-alarms on every healthy build.** The build asserted `Q(fine) > Q(coarse)` and logged "sanity check failed" whenever it didn't. But Newman modularity is resolution-dependent and maximised at a single scale, so a finer partition scores *lower* at the implicit γ=1 by construction — the assertion could never hold for a healthy hierarchy. Verified empirically: the fine partition only overtakes coarse at γ≈`BETA_FINE` (≈2.0). Replaced with `_hierarchy_health_warning()`, which checks what issue #33 is actually about — the fine level collapsing into *fewer* communities than coarse — plus a `MIN_HEALTHY_Q` floor for near-random partitions.
Expand Down
32 changes: 31 additions & 1 deletion src/neurostack/attractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,22 @@
# α: semantic similarity (embedding cosine) — structural/content overlap
# β_cooc: co-occurrence weight — Hebbian "used together" signal
# γ: wiki-link weight — explicit human connections
# δ_path: folder-path weight — notes sharing a top-level folder (e.g. work/
# vs home/) get a similarity bump. Embeddings see "infrastructure" as one
# topic regardless of work-vs-personal context; the folder layout carries
# that organisational signal, which otherwise never reaches detection.
ALPHA_SEMANTIC = 0.6
BETA_COOCCURRENCE = 0.25
GAMMA_WIKILINKS = 0.15
PATH_SIGNAL_WEIGHT = 0.3

# Folder depth the path signal groups on. 1 = top-level (work/, home/,
# research/…), the grain that separates work from personal. Deeper grouping
# (e.g. work/proj-a vs work/proj-b) measurably reduced cohesion. Notes with no
# folder (root-level files) form no path edges; a flat single-folder vault
# yields an all-zero signal, so this degrades gracefully. Set the weight to 0
# to disable.
PATH_PREFIX_DEPTH = 1

# ── Inverse temperature for attractor convergence ──
# Low β → broad themes (coarse), high β → narrow sub-themes (fine)
Expand Down Expand Up @@ -194,11 +207,28 @@ def _build_similarity_matrix(
S_links[src, tgt] = 1.0
S_links[tgt, src] = 1.0 # symmetric

# Blend all three signals
# 4. Folder-path signal: notes sharing a top-level folder prefix get a
# uniform similarity bump, carrying the vault's organisational structure
# (work/ vs home/ vs research/…) that embeddings alone don't capture.
S_path = np.zeros((n, n), dtype=np.float32)
if PATH_SIGNAL_WEIGHT > 0:
prefix_groups: dict[str, list[int]] = defaultdict(list)
for i, p in enumerate(note_paths):
prefix_groups["/".join(p.split("/")[:PATH_PREFIX_DEPTH])].append(i)
for members in prefix_groups.values():
# A root-level file (no folder) is its own singleton group and
# contributes no edges, which is what we want.
if len(members) < 2:
continue
mi = np.array(members)
S_path[np.ix_(mi, mi)] = 1.0

# Blend all four signals
S = (
ALPHA_SEMANTIC * S_semantic
+ BETA_COOCCURRENCE * S_cooc
+ GAMMA_WIKILINKS * S_links
+ PATH_SIGNAL_WEIGHT * S_path
)

# Zero out self-similarity (diagonal) — a note shouldn't attract itself
Expand Down
52 changes: 52 additions & 0 deletions tests/test_attractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from neurostack.attractor import (
ALPHA_SEMANTIC,
GAMMA_WIKILINKS,
PATH_SIGNAL_WEIGHT,
TOP_K_COARSE,
TOP_K_FINE,
TOP_K_NEIGHBORS,
Expand Down Expand Up @@ -680,3 +681,54 @@ def test_weak_structure_warns(self):
def test_equal_counts_ok(self):
# n_fine == n_coarse is a valid (non-collapsed) refinement boundary.
assert _hierarchy_health_warning(7, 7, 0.30, 0.20) is None


# ---------------------------------------------------------------------------
# Folder-path signal
# ---------------------------------------------------------------------------

class TestPathSignal:
"""Notes sharing a top-level folder get a uniform similarity bump."""

def test_same_folder_gets_bump_cross_folder_does_not(self, in_memory_db):
conn = in_memory_db
paths = ["work/a.md", "work/b.md", "home/c.md", "home/d.md"]
for p in paths:
_insert_note(conn, p)
conn.commit()
embs = np.eye(4, 16, dtype=np.float32) # orthogonal -> semantic ~0

S = _build_similarity_matrix(conn, paths, embs)

# same top-level folder -> path weight (semantic floor is ~0 here)
assert S[0, 1] == pytest.approx(PATH_SIGNAL_WEIGHT, abs=1e-4)
assert S[2, 3] == pytest.approx(PATH_SIGNAL_WEIGHT, abs=1e-4)
# different folder -> no path edge
assert S[0, 2] == pytest.approx(0.0, abs=1e-4)

def test_root_level_file_forms_no_path_edge(self, in_memory_db):
conn = in_memory_db
paths = ["home/a.md", "home/b.md", "top.md"]
for p in paths:
_insert_note(conn, p)
conn.commit()
embs = np.eye(3, 16, dtype=np.float32)

S = _build_similarity_matrix(conn, paths, embs)

assert S[0, 1] == pytest.approx(PATH_SIGNAL_WEIGHT, abs=1e-4) # home pair
assert S[0, 2] == pytest.approx(0.0, abs=1e-4) # root file, no group
assert S[1, 2] == pytest.approx(0.0, abs=1e-4)

def test_weight_zero_disables(self, in_memory_db):
conn = in_memory_db
paths = ["work/a.md", "work/b.md", "work/c.md"]
for p in paths:
_insert_note(conn, p)
conn.commit()
embs = np.eye(3, 16, dtype=np.float32)

from neurostack import attractor
with patch.object(attractor, "PATH_SIGNAL_WEIGHT", 0.0):
S = _build_similarity_matrix(conn, paths, embs)
assert S[0, 1] == pytest.approx(0.0, abs=1e-4) # same folder but off
Loading