Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,11 @@ Open the search modal with either the **Alt+K** keyboard shortcut or the toolbar

Type a query and results appear ranked by similarity. Click a result to navigate to that block. **Shift+click** (or **Shift+Enter**) opens the block in the right sidebar instead.

Hover over a result to reveal the **copy** button, or press **Ctrl+C** (**Cmd+C** on Mac) with a result selected, to copy a `((block reference))` to the clipboard.
When a query matches several blocks on the same page, they are collapsed into a **page group** showing the best match score and the number of matching blocks. Click the group header to expand or collapse its blocks. Short queries group more aggressively; longer, more specific queries tend to show individual blocks. Journal pages are never grouped.

The **Include journal** checkbox in the footer controls whether results from journal pages are shown.
Hover over a result to reveal the **copy** button, or press **Ctrl+C** (**Cmd+C** on Mac) with a result selected, to copy a `((block reference))` to the clipboard. On a page group, this copies a `[[page reference]]` instead.

Results from journal pages are gently down-ranked based on age so that recent entries surface above older ones. The **Include journal** checkbox in the footer controls whether journal results are shown at all.

Blocks are automatically indexed when the graph loads, and only changed blocks are re-embedded on subsequent runs. To rebuild the index from scratch, use the **Semantic Search: Rebuild index** command from the command palette (Ctrl/Cmd+Shift+P).

Expand Down
Binary file modified docs/demo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
200 changes: 200 additions & 0 deletions src/__tests__/ranking.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import { describe, it, expect } from "vitest";
import {
computeDecayMultiplier,
applyTimeDecay,
computeGroupingParams,
groupAndRank,
getOverfetchCount,
DECAY_FLOOR,
T_MIN,
T_MAX,
W_MAX,
W_MIN,
type ScoredResult,
} from "../ranking";
import type { SearchResult } from "../search";

describe("getOverfetchCount", () => {
it("returns 2x topK", () => {
expect(getOverfetchCount(10)).toBe(20);
expect(getOverfetchCount(1)).toBe(2);
});
});

describe("computeDecayMultiplier", () => {
const now = new Date(2025, 5, 15); // June 15, 2025

it("returns 1.0 for null journalDay", () => {
expect(computeDecayMultiplier(null, now)).toBe(1.0);
});

it("returns ~1.0 for today", () => {
expect(computeDecayMultiplier(20250615, now)).toBeCloseTo(1.0, 2);
});

it("returns ~0.906 for 180 days ago", () => {
// 180 days before June 15 = ~Dec 17, 2024
const journalDay = 20241217;
const result = computeDecayMultiplier(journalDay, now);
// F + (1-F)*e^(-1) = 0.85 + 0.15*0.368 = 0.905
expect(result).toBeCloseTo(0.905, 2);
});

it("converges to DECAY_FLOOR for very old entries", () => {
const result = computeDecayMultiplier(20100101, now);
expect(result).toBeCloseTo(DECAY_FLOOR, 2);
});

it("clamps future dates to 1.0", () => {
const result = computeDecayMultiplier(20260101, now);
expect(result).toBeCloseTo(1.0, 2);
});
});

describe("computeGroupingParams", () => {
it("returns min threshold and max density for short queries (<=3 words)", () => {
for (const wc of [1, 2, 3]) {
const { threshold, densityWeight } = computeGroupingParams(wc);
expect(threshold).toBe(T_MIN);
expect(densityWeight).toBeCloseTo(W_MAX);
}
});

it("returns max threshold and min density for long queries (>=8 words)", () => {
for (const wc of [8, 12]) {
const { threshold, densityWeight } = computeGroupingParams(wc);
expect(threshold).toBe(T_MAX);
expect(densityWeight).toBeCloseTo(W_MIN);
}
});

it("interpolates for mid-length queries", () => {
const { threshold, densityWeight } = computeGroupingParams(5);
// ratio = (5-3)/(8-3) = 0.4
expect(threshold).toBe(Math.round(T_MIN + (T_MAX - T_MIN) * 0.4));
expect(densityWeight).toBeCloseTo(W_MAX + (W_MIN - W_MAX) * 0.4);
});
});

describe("applyTimeDecay", () => {
it("applies decay multipliers from journal day map", () => {
const now = new Date(2025, 5, 15);
const results: SearchResult[] = [
{ blockId: "a", pageId: 1, similarity: 0.9 },
{ blockId: "b", pageId: 2, similarity: 0.8 },
];
const journalDays = new Map<number, number | null>([
[1, null], // not a journal
[2, 20250615], // today's journal
]);
const scored = applyTimeDecay(results, journalDays, now);
expect(scored[0].decayMultiplier).toBe(1.0);
expect(scored[0].adjustedScore).toBe(0.9);
expect(scored[1].decayMultiplier).toBeCloseTo(1.0, 2);
expect(scored[1].adjustedScore).toBeCloseTo(0.8, 2);
});

it("treats missing pageId as non-journal", () => {
const results: SearchResult[] = [
{ blockId: "a", pageId: 99, similarity: 0.9 },
];
const scored = applyTimeDecay(results, new Map());
expect(scored[0].decayMultiplier).toBe(1.0);
});
});

describe("groupAndRank", () => {
function makeScoredResult(
blockId: string,
pageId: number,
adjustedScore: number,
): ScoredResult {
return {
blockId,
pageId,
similarity: adjustedScore,
decayMultiplier: 1.0,
adjustedScore,
};
}

it("groups pages meeting threshold into PageGroup", () => {
const scored = [
makeScoredResult("a1", 1, 0.9),
makeScoredResult("a2", 1, 0.8),
makeScoredResult("a3", 1, 0.7),
makeScoredResult("a4", 1, 0.6),
makeScoredResult("b1", 2, 0.85),
];
// queryWordCount=1 → threshold=T_MIN=4, densityWeight=W_MAX=0.15
const items = groupAndRank(scored, 1, 10, new Set());
expect(items[0].kind).toBe("page-group");
if (items[0].kind === "page-group") {
expect(items[0].pageId).toBe(1);
// pageScore = 0.9 + 0.8*0.15/1 + 0.7*0.15/2 + 0.6*0.15/3 (harmonic decay)
expect(items[0].pageScore).toBeCloseTo(0.9 + 0.8 * 0.15 + 0.7 * 0.15 / 2 + 0.6 * 0.15 / 3);
expect(items[0].blocks).toHaveLength(4);
}
expect(items[1].kind).toBe("single-block");
});

it("does not group pages below threshold", () => {
const scored = [
makeScoredResult("a1", 1, 0.9),
makeScoredResult("b1", 2, 0.85),
];
// threshold=4, page 1 has only 1 block
const items = groupAndRank(scored, 1, 10, new Set());
expect(items.every((i) => i.kind === "single-block")).toBe(true);
});

it("never groups journal pages", () => {
const scored = [
makeScoredResult("a1", 1, 0.9),
makeScoredResult("a2", 1, 0.8),
makeScoredResult("a3", 1, 0.7),
makeScoredResult("a4", 1, 0.6),
];
const journalPageIds = new Set([1]);
const items = groupAndRank(scored, 1, 10, journalPageIds);
expect(items.every((i) => i.kind === "single-block")).toBe(true);
});

it("respects topK limit", () => {
const scored = [
makeScoredResult("a1", 1, 0.9),
makeScoredResult("b1", 2, 0.85),
makeScoredResult("c1", 3, 0.80),
makeScoredResult("d1", 4, 0.75),
];
const items = groupAndRank(scored, 1, 2, new Set());
expect(items).toHaveLength(2);
});


it("sorts by score descending", () => {
const scored = [
makeScoredResult("a1", 1, 0.5),
makeScoredResult("b1", 2, 0.9),
];
const items = groupAndRank(scored, 1, 10, new Set());
expect(items[0].kind === "single-block" && items[0].result.blockId === "b1").toBe(true);
});

it("handles empty input", () => {
const items = groupAndRank([], 1, 10, new Set());
expect(items).toHaveLength(0);
});

it("handles all results from one page", () => {
const scored = [
makeScoredResult("a1", 1, 0.9),
makeScoredResult("a2", 1, 0.8),
makeScoredResult("a3", 1, 0.7),
makeScoredResult("a4", 1, 0.6),
];
const items = groupAndRank(scored, 1, 10, new Set());
expect(items).toHaveLength(1);
expect(items[0].kind).toBe("page-group");
});
});
134 changes: 134 additions & 0 deletions src/ranking.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import type { SearchResult } from "./search";

export interface ScoredResult {
blockId: string;
pageId: number;
similarity: number;
decayMultiplier: number;
adjustedScore: number;
}

export interface PageGroup {
kind: "page-group";
pageId: number;
pageScore: number;
blocks: ScoredResult[];
}

export interface SingleBlock {
kind: "single-block";
result: ScoredResult;
}

export type RankedItem = PageGroup | SingleBlock;

export const DECAY_FLOOR = 0.85;
export const DECAY_SCALE_DAYS = 180;
export const T_MIN = 4;
export const T_MAX = 6;
export const W_MAX = 0.15;
export const W_MIN = 0.03;
export const QUERY_SHORT = 3;
export const QUERY_LONG = 8;
export const OVERFETCH_MULTIPLIER = 2;

export function getOverfetchCount(topK: number): number {
return topK * OVERFETCH_MULTIPLIER;
}

export function computeDecayMultiplier(
journalDay: number | null,
now: Date = new Date(),
): number {
if (journalDay === null) return 1.0;
const str = String(journalDay);
const year = parseInt(str.slice(0, 4), 10);
const month = parseInt(str.slice(4, 6), 10) - 1;
const day = parseInt(str.slice(6, 8), 10);
const journalDate = new Date(year, month, day);
const deltaMs = now.getTime() - journalDate.getTime();
const deltaDays = Math.max(0, deltaMs / (1000 * 60 * 60 * 24));
return DECAY_FLOOR + (1 - DECAY_FLOOR) * Math.exp(-deltaDays / DECAY_SCALE_DAYS);
}

export function applyTimeDecay(
results: SearchResult[],
journalDays: Map<number, number | null>,
now: Date = new Date(),
): ScoredResult[] {
return results.map((r) => {
const journalDay = journalDays.get(r.pageId) ?? null;
const decayMultiplier = computeDecayMultiplier(journalDay, now);
return {
blockId: r.blockId,
pageId: r.pageId,
similarity: r.similarity,
decayMultiplier,
adjustedScore: r.similarity * decayMultiplier,
};
});
}

export function computeGroupingParams(queryWordCount: number): {
threshold: number;
densityWeight: number;
} {
const ratio = Math.max(0, Math.min(1, (queryWordCount - QUERY_SHORT) / (QUERY_LONG - QUERY_SHORT)));
const threshold = Math.round(T_MIN + (T_MAX - T_MIN) * ratio);
const densityWeight = W_MAX + (W_MIN - W_MAX) * ratio;
return { threshold, densityWeight };
}

export function groupAndRank(
scored: ScoredResult[],
queryWordCount: number,
topK: number,
journalPageIds: Set<number>,
): RankedItem[] {
const { threshold, densityWeight } = computeGroupingParams(queryWordCount);

// Group by pageId
const byPage = new Map<number, ScoredResult[]>();
for (const s of scored) {
let arr = byPage.get(s.pageId);
if (!arr) {
arr = [];
byPage.set(s.pageId, arr);
}
arr.push(s);
}

// Sort each page's blocks descending
for (const arr of byPage.values()) {
arr.sort((a, b) => b.adjustedScore - a.adjustedScore);
}

const items: RankedItem[] = [];
for (const [pageId, blocks] of byPage) {
if (!journalPageIds.has(pageId) && blocks.length >= threshold) {
const maxScore = blocks[0].adjustedScore;
let bonus = 0;
for (let i = 1; i < blocks.length; i++) {
bonus += blocks[i].adjustedScore * densityWeight / i;
}
items.push({
kind: "page-group",
pageId,
pageScore: maxScore + bonus,
blocks,
});
} else {
for (const b of blocks) {
items.push({ kind: "single-block", result: b });
}
}
}

items.sort((a, b) => {
const scoreA = a.kind === "page-group" ? a.pageScore : a.result.adjustedScore;
const scoreB = b.kind === "page-group" ? b.pageScore : b.result.adjustedScore;
return scoreB - scoreA;
});

return items.slice(0, topK);
}
Loading