CortexReach · TriDefender · Apr 7, 2026 · Apr 7, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/scripts/ci-test-manifest.mjs b/scripts/ci-test-manifest.mjs
@@ -23,6 +23,7 @@ export const CI_TEST_MANIFEST = [
   { group: "core-regression", runner: "node", file: "test/retriever-rerank-regression.mjs" },
   { group: "core-regression", runner: "node", file: "test/smart-memory-lifecycle.mjs" },
   { group: "core-regression", runner: "node", file: "test/smart-extractor-branches.mjs" },
+  { group: "core-regression", runner: "node", file: "test/smart-extractor-batch-embed.test.mjs" },
   { group: "packaging-and-workflow", runner: "node", file: "test/plugin-manifest-regression.mjs" },
   { group: "core-regression", runner: "node", file: "test/session-summary-before-reset.test.mjs", args: ["--test"] },
   { group: "packaging-and-workflow", runner: "node", file: "test/sync-plugin-version.test.mjs", args: ["--test"] },

diff --git a/scripts/verify-ci-test-manifest.mjs b/scripts/verify-ci-test-manifest.mjs
@@ -24,6 +24,7 @@ const EXPECTED_BASELINE = [
   { group: "core-regression", runner: "node", file: "test/retriever-rerank-regression.mjs" },
   { group: "core-regression", runner: "node", file: "test/smart-memory-lifecycle.mjs" },
   { group: "core-regression", runner: "node", file: "test/smart-extractor-branches.mjs" },
+  { group: "core-regression", runner: "node", file: "test/smart-extractor-batch-embed.test.mjs" },
   { group: "packaging-and-workflow", runner: "node", file: "test/plugin-manifest-regression.mjs" },
   { group: "core-regression", runner: "node", file: "test/session-summary-before-reset.test.mjs", args: ["--test"] },
   { group: "packaging-and-workflow", runner: "node", file: "test/sync-plugin-version.test.mjs", args: ["--test"] },

diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts
@@ -287,10 +287,9 @@ export class SmartExtractor {
     let survivingCandidates = capped;
     try {
       const abstracts = capped.map((c) => c.abstract);
-      const vectors = await Promise.all(
-        abstracts.map((a) => this.embedder.embed(a).catch(() => [] as number[])),
-      );
-      const dedupResult = batchDedup(abstracts, vectors);
+      const vectors = await this.embedder.embedBatch(abstracts);
+      const safeVectors = vectors.map((v) => v || []);
+      const dedupResult = batchDedup(abstracts, safeVectors);
       if (dedupResult.duplicateIndices.length > 0) {
         survivingCandidates = dedupResult.survivingIndices.map((i) => capped[i]);
         stats.skipped += dedupResult.duplicateIndices.length;
@@ -304,26 +303,61 @@ export class SmartExtractor {
       );
     }
 
-    // Step 2: Process each surviving candidate through dedup pipeline
-    for (const candidate of survivingCandidates) {
+    // Step 2: Process each surviving candidate through dedup pipeline.
+    //
+    // Optimization: filter boundary-excluded candidates BEFORE batch embedding
+    // to avoid wasting embed API calls on candidates that will be skipped.
+    // See MR1 from code review.
+    const processableCandidates: { index: number; candidate: CandidateMemory }[] = [];
+    for (let i = 0; i < survivingCandidates.length; i++) {
+      const c = survivingCandidates[i];
       if (
         isUserMdExclusiveMemory(
           {
-            memoryCategory: candidate.category,
-            abstract: candidate.abstract,
-            content: candidate.content,
+            memoryCategory: c.category,
+            abstract: c.abstract,
+            content: c.content,
           },
           this.config.workspaceBoundary,
         )
       ) {
         stats.skipped += 1;
         stats.boundarySkipped = (stats.boundarySkipped ?? 0) + 1;
         this.log(
-          `memory-pro: smart-extractor: skipped USER.md-exclusive [${candidate.category}] ${candidate.abstract.slice(0, 60)}`,
+          `memory-pro: smart-extractor: skipped USER.md-exclusive [${c.category}] ${c.abstract.slice(0, 60)}`,
         );
         continue;
       }
+      processableCandidates.push({ index: i, candidate: c });
+    }
 
+    // Pre-compute vectors for processable non-profile candidates in a single batch API call
+    // to reduce embedding round-trips from N to 1.
+    const precomputedVectors = new Map<number, number[]>();
+    const nonProfileToEmbed: { index: number; text: string }[] = [];
+    for (const { index, candidate } of processableCandidates) {
+      if (!ALWAYS_MERGE_CATEGORIES.has(candidate.category)) {
+        nonProfileToEmbed.push({ index, text: `${candidate.abstract} ${candidate.content}` });
+      }
+    }
+    if (nonProfileToEmbed.length > 0) {
+      try {
+        const batchTexts = nonProfileToEmbed.map((e) => e.text);
+        const batchVectors = await this.embedder.embedBatch(batchTexts);
+        for (let j = 0; j < nonProfileToEmbed.length; j++) {
+          const vec = batchVectors[j];
+          if (vec && vec.length > 0) {
+            precomputedVectors.set(nonProfileToEmbed[j].index, vec);
+          }
+        }
+      } catch (err) {
+        this.log(
+          `memory-pro: smart-extractor: batch pre-embed failed, will embed individually: ${String(err)}`,
+        );
+      }
+    }
+
+    for (const { index, candidate } of processableCandidates) {
       try {
         await this.processCandidate(
           candidate,
@@ -332,6 +366,7 @@ export class SmartExtractor {
           stats,
           targetScope,
           scopeFilter,
+          precomputedVectors.get(index),
         );
       } catch (err) {
         this.log(
@@ -351,38 +386,70 @@ export class SmartExtractor {
    * Filter out texts that match noise prototypes by embedding similarity.
    * Long texts (>300 chars) are passed through without checking.
    * Only active when noiseBank is configured and initialized.
+   *
+   * Uses batch embedding to reduce API round-trips from N to 1.
    */
   async filterNoiseByEmbedding(texts: string[]): Promise<string[]> {
     const noiseBank = this.config.noiseBank;
     if (!noiseBank || !noiseBank.initialized) return texts;
 
-    const result: string[] = [];
-    for (const text of texts) {
-      // Very short texts lack semantic signal — skip noise check to avoid false positives
-      if (text.length <= 8) {
-        result.push(text);
-        continue;
-      }
-      // Long texts are unlikely to be pure noise queries
-      if (text.length > 300) {
-        result.push(text);
-        continue;
+    // Partition: short/long texts bypass noise check; mid-length need embedding
+    const SHORT_THRESHOLD = 8;
+    const LONG_THRESHOLD = 300;
+    const bypassFlags: boolean[] = texts.map(
+      (t) => t.length <= SHORT_THRESHOLD || t.length > LONG_THRESHOLD,
+    );
+
+    const needsEmbedIndices: number[] = [];
+    const needsEmbedTexts: string[] = [];
+    for (let i = 0; i < texts.length; i++) {
+      if (!bypassFlags[i]) {
+        needsEmbedIndices.push(i);
+        needsEmbedTexts.push(texts[i]);
       }
+    }
+
+    // Batch embed all mid-length texts in a single API call
+    let vectors: number[][] = [];
+    if (needsEmbedTexts.length > 0) {
       try {
-        const vec = await this.embedder.embed(text);
-        if (!vec || vec.length === 0 || !noiseBank.isNoise(vec)) {
-          result.push(text);
-        } else {
-          this.debugLog(
-            `memory-lancedb-pro: smart-extractor: embedding noise filtered: ${text.slice(0, 80)}`,
-          );
-        }
+        vectors = await this.embedder.embedBatch(needsEmbedTexts);
       } catch {
-        // Embedding failed — pass text through
-        result.push(text);
+        // Batch failed — pass all through
+        return texts.slice();
+      }
+    }
+
+    const result: string[] = new Array(texts.length);
+    // First, fill in bypass texts (always kept)
+    for (let i = 0; i < texts.length; i++) {
+      if (bypassFlags[i]) {
+        result[i] = texts[i];
+      }
+    }
+
+    // Then, check noise for embedded texts
+    for (let j = 0; j < needsEmbedIndices.length; j++) {
+      const idx = needsEmbedIndices[j];
+      const vec = vectors[j];
+      if (!vec || vec.length === 0) {
+        result[idx] = texts[idx];
+        continue;
+      }
+      if (noiseBank.isNoise(vec)) {
+        this.debugLog(
+          `memory-lancedb-pro: smart-extractor: embedding noise filtered: ${texts[idx].slice(0, 80)}`,
+        );
+        // Leave result[idx] as undefined — will be compacted below
+      } else {
+        result[idx] = texts[idx];
       }
     }
-    return result;
+
+    // Compact: remove undefined slots (filtered-out entries).
+    // Use explicit undefined check rather than filter(Boolean) to preserve
+    // empty strings that were legitimately in bypass slots.
+    return result.filter((x): x is string => x !== undefined);
   }
 
   /**
@@ -513,6 +580,10 @@ export class SmartExtractor {
 
   /**
    * Process a single candidate memory: dedup → merge/create → store
+   *
+   * @param precomputedVector - Optional pre-embedded vector for the candidate.
+   *   When provided (from batch pre-embedding), skips the per-candidate embed
+   *   call to reduce API round-trips.
    */
   private async processCandidate(
     candidate: CandidateMemory,
@@ -521,6 +592,7 @@ export class SmartExtractor {
     stats: ExtractionStats,
     targetScope: string,
     scopeFilter?: string[],
+    precomputedVector?: number[],
   ): Promise<void> {
     // Profile always merges (skip dedup — admission control still applies)
     if (ALWAYS_MERGE_CATEGORIES.has(candidate.category)) {
@@ -541,9 +613,9 @@ export class SmartExtractor {
       return;
     }
 
-    // Embed the candidate for vector dedup
-    const embeddingText = `${candidate.abstract} ${candidate.content}`;
-    const vector = await this.embedder.embed(embeddingText);
+    // Use pre-computed vector if available (batch embed optimization),
+    // otherwise fall back to per-candidate embed call.
+    const vector = precomputedVector ?? await this.embedder.embed(`${candidate.abstract} ${candidate.content}`);
     if (!vector || vector.length === 0) {
       this.log("memory-pro: smart-extractor: embedding failed, storing as-is");
       await this.storeCandidate(candidate, vector || [], sessionKey, targetScope);