Skip to content
Open
14 changes: 12 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions scripts/ci-test-manifest.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const CI_TEST_MANIFEST = [
{ group: "core-regression", runner: "node", file: "test/retriever-rerank-regression.mjs" },
{ group: "core-regression", runner: "node", file: "test/smart-memory-lifecycle.mjs" },
{ group: "core-regression", runner: "node", file: "test/smart-extractor-branches.mjs" },
{ group: "core-regression", runner: "node", file: "test/smart-extractor-batch-embed.test.mjs" },
{ group: "packaging-and-workflow", runner: "node", file: "test/plugin-manifest-regression.mjs" },
{ group: "core-regression", runner: "node", file: "test/session-summary-before-reset.test.mjs", args: ["--test"] },
{ group: "packaging-and-workflow", runner: "node", file: "test/sync-plugin-version.test.mjs", args: ["--test"] },
Expand Down
1 change: 1 addition & 0 deletions scripts/verify-ci-test-manifest.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const EXPECTED_BASELINE = [
{ group: "core-regression", runner: "node", file: "test/retriever-rerank-regression.mjs" },
{ group: "core-regression", runner: "node", file: "test/smart-memory-lifecycle.mjs" },
{ group: "core-regression", runner: "node", file: "test/smart-extractor-branches.mjs" },
{ group: "core-regression", runner: "node", file: "test/smart-extractor-batch-embed.test.mjs" },
{ group: "packaging-and-workflow", runner: "node", file: "test/plugin-manifest-regression.mjs" },
{ group: "core-regression", runner: "node", file: "test/session-summary-before-reset.test.mjs", args: ["--test"] },
{ group: "packaging-and-workflow", runner: "node", file: "test/sync-plugin-version.test.mjs", args: ["--test"] },
Expand Down
142 changes: 107 additions & 35 deletions src/smart-extractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,9 @@ export class SmartExtractor {
let survivingCandidates = capped;
try {
const abstracts = capped.map((c) => c.abstract);
const vectors = await Promise.all(
abstracts.map((a) => this.embedder.embed(a).catch(() => [] as number[])),
);
const dedupResult = batchDedup(abstracts, vectors);
const vectors = await this.embedder.embedBatch(abstracts);
const safeVectors = vectors.map((v) => v || []);
const dedupResult = batchDedup(abstracts, safeVectors);
if (dedupResult.duplicateIndices.length > 0) {
survivingCandidates = dedupResult.survivingIndices.map((i) => capped[i]);
stats.skipped += dedupResult.duplicateIndices.length;
Expand All @@ -304,26 +303,61 @@ export class SmartExtractor {
);
}

// Step 2: Process each surviving candidate through dedup pipeline
for (const candidate of survivingCandidates) {
// Step 2: Process each surviving candidate through dedup pipeline.
//
// Optimization: filter boundary-excluded candidates BEFORE batch embedding
// to avoid wasting embed API calls on candidates that will be skipped.
// See MR1 from code review.
const processableCandidates: { index: number; candidate: CandidateMemory }[] = [];
for (let i = 0; i < survivingCandidates.length; i++) {
const c = survivingCandidates[i];
if (
isUserMdExclusiveMemory(
{
memoryCategory: candidate.category,
abstract: candidate.abstract,
content: candidate.content,
memoryCategory: c.category,
abstract: c.abstract,
content: c.content,
},
this.config.workspaceBoundary,
)
) {
stats.skipped += 1;
stats.boundarySkipped = (stats.boundarySkipped ?? 0) + 1;
this.log(
`memory-pro: smart-extractor: skipped USER.md-exclusive [${candidate.category}] ${candidate.abstract.slice(0, 60)}`,
`memory-pro: smart-extractor: skipped USER.md-exclusive [${c.category}] ${c.abstract.slice(0, 60)}`,
);
continue;
}
processableCandidates.push({ index: i, candidate: c });
}

// Pre-compute vectors for processable non-profile candidates in a single batch API call
// to reduce embedding round-trips from N to 1.
const precomputedVectors = new Map<number, number[]>();
const nonProfileToEmbed: { index: number; text: string }[] = [];
for (const { index, candidate } of processableCandidates) {
if (!ALWAYS_MERGE_CATEGORIES.has(candidate.category)) {
nonProfileToEmbed.push({ index, text: `${candidate.abstract} ${candidate.content}` });
}
}
if (nonProfileToEmbed.length > 0) {
try {
const batchTexts = nonProfileToEmbed.map((e) => e.text);
const batchVectors = await this.embedder.embedBatch(batchTexts);
for (let j = 0; j < nonProfileToEmbed.length; j++) {
const vec = batchVectors[j];
if (vec && vec.length > 0) {
precomputedVectors.set(nonProfileToEmbed[j].index, vec);
}
}
} catch (err) {
this.log(
`memory-pro: smart-extractor: batch pre-embed failed, will embed individually: ${String(err)}`,
);
}
}

for (const { index, candidate } of processableCandidates) {
try {
await this.processCandidate(
candidate,
Expand All @@ -332,6 +366,7 @@ export class SmartExtractor {
stats,
targetScope,
scopeFilter,
precomputedVectors.get(index),
);
} catch (err) {
this.log(
Expand All @@ -351,38 +386,70 @@ export class SmartExtractor {
* Filter out texts that match noise prototypes by embedding similarity.
* Long texts (>300 chars) are passed through without checking.
* Only active when noiseBank is configured and initialized.
*
* Uses batch embedding to reduce API round-trips from N to 1.
*/
async filterNoiseByEmbedding(texts: string[]): Promise<string[]> {
const noiseBank = this.config.noiseBank;
if (!noiseBank || !noiseBank.initialized) return texts;

const result: string[] = [];
for (const text of texts) {
// Very short texts lack semantic signal — skip noise check to avoid false positives
if (text.length <= 8) {
result.push(text);
continue;
}
// Long texts are unlikely to be pure noise queries
if (text.length > 300) {
result.push(text);
continue;
// Partition: short/long texts bypass noise check; mid-length need embedding
const SHORT_THRESHOLD = 8;
const LONG_THRESHOLD = 300;
const bypassFlags: boolean[] = texts.map(
(t) => t.length <= SHORT_THRESHOLD || t.length > LONG_THRESHOLD,
);

const needsEmbedIndices: number[] = [];
const needsEmbedTexts: string[] = [];
for (let i = 0; i < texts.length; i++) {
if (!bypassFlags[i]) {
needsEmbedIndices.push(i);
needsEmbedTexts.push(texts[i]);
}
}

// Batch embed all mid-length texts in a single API call
let vectors: number[][] = [];
if (needsEmbedTexts.length > 0) {
try {
const vec = await this.embedder.embed(text);
if (!vec || vec.length === 0 || !noiseBank.isNoise(vec)) {
result.push(text);
} else {
this.debugLog(
`memory-lancedb-pro: smart-extractor: embedding noise filtered: ${text.slice(0, 80)}`,
);
}
vectors = await this.embedder.embedBatch(needsEmbedTexts);
} catch {
// Embedding failed — pass text through
result.push(text);
// Batch failed — pass all through
return texts.slice();
}
}

const result: string[] = new Array(texts.length);
// First, fill in bypass texts (always kept)
for (let i = 0; i < texts.length; i++) {
if (bypassFlags[i]) {
result[i] = texts[i];
}
}

// Then, check noise for embedded texts
for (let j = 0; j < needsEmbedIndices.length; j++) {
const idx = needsEmbedIndices[j];
const vec = vectors[j];
if (!vec || vec.length === 0) {
result[idx] = texts[idx];
continue;
}
if (noiseBank.isNoise(vec)) {
this.debugLog(
`memory-lancedb-pro: smart-extractor: embedding noise filtered: ${texts[idx].slice(0, 80)}`,
);
// Leave result[idx] as undefined — will be compacted below
} else {
result[idx] = texts[idx];
}
}
return result;

// Compact: remove undefined slots (filtered-out entries).
// Use explicit undefined check rather than filter(Boolean) to preserve
// empty strings that were legitimately in bypass slots.
return result.filter((x): x is string => x !== undefined);
}

/**
Expand Down Expand Up @@ -513,6 +580,10 @@ export class SmartExtractor {

/**
* Process a single candidate memory: dedup → merge/create → store
*
* @param precomputedVector - Optional pre-embedded vector for the candidate.
* When provided (from batch pre-embedding), skips the per-candidate embed
* call to reduce API round-trips.
*/
private async processCandidate(
candidate: CandidateMemory,
Expand All @@ -521,6 +592,7 @@ export class SmartExtractor {
stats: ExtractionStats,
targetScope: string,
scopeFilter?: string[],
precomputedVector?: number[],
): Promise<void> {
// Profile always merges (skip dedup — admission control still applies)
if (ALWAYS_MERGE_CATEGORIES.has(candidate.category)) {
Expand All @@ -541,9 +613,9 @@ export class SmartExtractor {
return;
}

// Embed the candidate for vector dedup
const embeddingText = `${candidate.abstract} ${candidate.content}`;
const vector = await this.embedder.embed(embeddingText);
// Use pre-computed vector if available (batch embed optimization),
// otherwise fall back to per-candidate embed call.
const vector = precomputedVector ?? await this.embedder.embed(`${candidate.abstract} ${candidate.content}`);
if (!vector || vector.length === 0) {
this.log("memory-pro: smart-extractor: embedding failed, storing as-is");
await this.storeCandidate(candidate, vector || [], sessionKey, targetScope);
Expand Down
Loading