diff --git a/package-lock.json b/package-lock.json
index fcbf1b04..de165655 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "memory-lancedb-pro",
-  "version": "1.1.0-beta.9",
+  "version": "1.1.0-beta.10",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "memory-lancedb-pro",
-      "version": "1.1.0-beta.9",
+      "version": "1.1.0-beta.10",
       "license": "MIT",
       "dependencies": {
         "@lancedb/lancedb": "^0.26.2",
@@ -20,6 +20,13 @@
         "commander": "^14.0.0",
         "jiti": "^2.6.0",
         "typescript": "^5.9.3"
+      },
+      "optionalDependencies": {
+        "@lancedb/lancedb-darwin-arm64": "^0.26.2",
+        "@lancedb/lancedb-darwin-x64": "^0.26.2",
+        "@lancedb/lancedb-linux-arm64-gnu": "^0.26.2",
+        "@lancedb/lancedb-linux-x64-gnu": "^0.26.2",
+        "@lancedb/lancedb-win32-x64-msvc": "^0.26.2"
       }
     },
     "node_modules/@lancedb/lancedb": {
@@ -71,6 +78,9 @@
         "node": ">= 18"
       }
     },
+    "node_modules/@lancedb/lancedb-darwin-x64": {
+      "optional": true
+    },
     "node_modules/@lancedb/lancedb-linux-arm64-gnu": {
       "version": "0.26.2",
       "resolved": "https://registry.npmjs.org/@lancedb/lancedb-linux-arm64-gnu/-/lancedb-linux-arm64-gnu-0.26.2.tgz",
diff --git a/scripts/ci-test-manifest.mjs b/scripts/ci-test-manifest.mjs
index 77bc1d98..48286b9f 100644
--- a/scripts/ci-test-manifest.mjs
+++ b/scripts/ci-test-manifest.mjs
@@ -23,6 +23,7 @@ export const CI_TEST_MANIFEST = [
   { group: "core-regression", runner: "node", file: "test/retriever-rerank-regression.mjs" },
   { group: "core-regression", runner: "node", file: "test/smart-memory-lifecycle.mjs" },
   { group: "core-regression", runner: "node", file: "test/smart-extractor-branches.mjs" },
+  { group: "core-regression", runner: "node", file: "test/smart-extractor-batch-embed.test.mjs" },
   { group: "packaging-and-workflow", runner: "node", file: "test/plugin-manifest-regression.mjs" },
   { group: "core-regression", runner: "node", file: "test/session-summary-before-reset.test.mjs", args: ["--test"] },
   { group: "packaging-and-workflow", runner: "node", file: "test/sync-plugin-version.test.mjs", args: ["--test"] },
diff --git a/scripts/verify-ci-test-manifest.mjs b/scripts/verify-ci-test-manifest.mjs
index 1a7d652a..df8090dc 100644
--- a/scripts/verify-ci-test-manifest.mjs
+++ b/scripts/verify-ci-test-manifest.mjs
@@ -24,6 +24,7 @@ const EXPECTED_BASELINE = [
   { group: "core-regression", runner: "node", file: "test/retriever-rerank-regression.mjs" },
   { group: "core-regression", runner: "node", file: "test/smart-memory-lifecycle.mjs" },
   { group: "core-regression", runner: "node", file: "test/smart-extractor-branches.mjs" },
+  { group: "core-regression", runner: "node", file: "test/smart-extractor-batch-embed.test.mjs" },
   { group: "packaging-and-workflow", runner: "node", file: "test/plugin-manifest-regression.mjs" },
   { group: "core-regression", runner: "node", file: "test/session-summary-before-reset.test.mjs", args: ["--test"] },
   { group: "packaging-and-workflow", runner: "node", file: "test/sync-plugin-version.test.mjs", args: ["--test"] },
diff --git a/src/smart-extractor.ts b/src/smart-extractor.ts
index dab1bb56..fcdd68f2 100644
--- a/src/smart-extractor.ts
+++ b/src/smart-extractor.ts
@@ -287,10 +287,9 @@ export class SmartExtractor {
     let survivingCandidates = capped;
     try {
       const abstracts = capped.map((c) => c.abstract);
-      const vectors = await Promise.all(
-        abstracts.map((a) => this.embedder.embed(a).catch(() => [] as number[])),
-      );
-      const dedupResult = batchDedup(abstracts, vectors);
+      const vectors = await this.embedder.embedBatch(abstracts);
+      const safeVectors = vectors.map((v) => v || []);
+      const dedupResult = batchDedup(abstracts, safeVectors);
       if (dedupResult.duplicateIndices.length > 0) {
         survivingCandidates = dedupResult.survivingIndices.map((i) => capped[i]);
         stats.skipped += dedupResult.duplicateIndices.length;
@@ -304,14 +303,20 @@ export class SmartExtractor {
       );
     }
 
-    // Step 2: Process each surviving candidate through dedup pipeline
-    for (const candidate of survivingCandidates) {
+    // Step 2: Process each surviving candidate through dedup pipeline.
+    //
+    // Optimization: filter boundary-excluded candidates BEFORE batch embedding
+    // to avoid wasting embed API calls on candidates that will be skipped.
+    // See MR1 from code review.
+    const processableCandidates: { index: number; candidate: CandidateMemory }[] = [];
+    for (let i = 0; i < survivingCandidates.length; i++) {
+      const c = survivingCandidates[i];
       if (
         isUserMdExclusiveMemory(
           {
-            memoryCategory: candidate.category,
-            abstract: candidate.abstract,
-            content: candidate.content,
+            memoryCategory: c.category,
+            abstract: c.abstract,
+            content: c.content,
           },
           this.config.workspaceBoundary,
         )
@@ -319,11 +324,40 @@ export class SmartExtractor {
         stats.skipped += 1;
         stats.boundarySkipped = (stats.boundarySkipped ?? 0) + 1;
         this.log(
-          `memory-pro: smart-extractor: skipped USER.md-exclusive [${candidate.category}] ${candidate.abstract.slice(0, 60)}`,
+          `memory-pro: smart-extractor: skipped USER.md-exclusive [${c.category}] ${c.abstract.slice(0, 60)}`,
         );
         continue;
       }
+      processableCandidates.push({ index: i, candidate: c });
+    }
 
+    // Pre-compute vectors for processable non-profile candidates in a single batch API call
+    // to reduce embedding round-trips from N to 1.
+    const precomputedVectors = new Map<number, number[]>();
+    const nonProfileToEmbed: { index: number; text: string }[] = [];
+    for (const { index, candidate } of processableCandidates) {
+      if (!ALWAYS_MERGE_CATEGORIES.has(candidate.category)) {
+        nonProfileToEmbed.push({ index, text: `${candidate.abstract} ${candidate.content}` });
+      }
+    }
+    if (nonProfileToEmbed.length > 0) {
+      try {
+        const batchTexts = nonProfileToEmbed.map((e) => e.text);
+        const batchVectors = await this.embedder.embedBatch(batchTexts);
+        for (let j = 0; j < nonProfileToEmbed.length; j++) {
+          const vec = batchVectors[j];
+          if (vec && vec.length > 0) {
+            precomputedVectors.set(nonProfileToEmbed[j].index, vec);
+          }
+        }
+      } catch (err) {
+        this.log(
+          `memory-pro: smart-extractor: batch pre-embed failed, will embed individually: ${String(err)}`,
+        );
+      }
+    }
+
+    for (const { index, candidate } of processableCandidates) {
       try {
         await this.processCandidate(
           candidate,
@@ -332,6 +366,7 @@ export class SmartExtractor {
           stats,
           targetScope,
           scopeFilter,
+          precomputedVectors.get(index),
         );
       } catch (err) {
         this.log(
@@ -351,38 +386,70 @@ export class SmartExtractor {
    * Filter out texts that match noise prototypes by embedding similarity.
    * Long texts (>300 chars) are passed through without checking.
    * Only active when noiseBank is configured and initialized.
+   *
+   * Uses batch embedding to reduce API round-trips from N to 1.
    */
   async filterNoiseByEmbedding(texts: string[]): Promise<string[]> {
     const noiseBank = this.config.noiseBank;
     if (!noiseBank || !noiseBank.initialized) return texts;
 
-    const result: string[] = [];
-    for (const text of texts) {
-      // Very short texts lack semantic signal — skip noise check to avoid false positives
-      if (text.length <= 8) {
-        result.push(text);
-        continue;
-      }
-      // Long texts are unlikely to be pure noise queries
-      if (text.length > 300) {
-        result.push(text);
-        continue;
+    // Partition: short/long texts bypass noise check; mid-length need embedding
+    const SHORT_THRESHOLD = 8;
+    const LONG_THRESHOLD = 300;
+    const bypassFlags: boolean[] = texts.map(
+      (t) => t.length <= SHORT_THRESHOLD || t.length > LONG_THRESHOLD,
+    );
+
+    const needsEmbedIndices: number[] = [];
+    const needsEmbedTexts: string[] = [];
+    for (let i = 0; i < texts.length; i++) {
+      if (!bypassFlags[i]) {
+        needsEmbedIndices.push(i);
+        needsEmbedTexts.push(texts[i]);
       }
+    }
+
+    // Batch embed all mid-length texts in a single API call
+    let vectors: number[][] = [];
+    if (needsEmbedTexts.length > 0) {
       try {
-        const vec = await this.embedder.embed(text);
-        if (!vec || vec.length === 0 || !noiseBank.isNoise(vec)) {
-          result.push(text);
-        } else {
-          this.debugLog(
-            `memory-lancedb-pro: smart-extractor: embedding noise filtered: ${text.slice(0, 80)}`,
-          );
-        }
+        vectors = await this.embedder.embedBatch(needsEmbedTexts);
       } catch {
-        // Embedding failed — pass text through
-        result.push(text);
+        // Batch failed — pass all through
+        return texts.slice();
+      }
+    }
+
+    const result: string[] = new Array(texts.length);
+    // First, fill in bypass texts (always kept)
+    for (let i = 0; i < texts.length; i++) {
+      if (bypassFlags[i]) {
+        result[i] = texts[i];
+      }
+    }
+
+    // Then, check noise for embedded texts
+    for (let j = 0; j < needsEmbedIndices.length; j++) {
+      const idx = needsEmbedIndices[j];
+      const vec = vectors[j];
+      if (!vec || vec.length === 0) {
+        result[idx] = texts[idx];
+        continue;
+      }
+      if (noiseBank.isNoise(vec)) {
+        this.debugLog(
+          `memory-lancedb-pro: smart-extractor: embedding noise filtered: ${texts[idx].slice(0, 80)}`,
+        );
+        // Leave result[idx] as undefined — will be compacted below
+      } else {
+        result[idx] = texts[idx];
       }
     }
-    return result;
+
+    // Compact: remove undefined slots (filtered-out entries).
+    // Use explicit undefined check rather than filter(Boolean) to preserve
+    // empty strings that were legitimately in bypass slots.
+    return result.filter((x): x is string => x !== undefined);
   }
 
   /**
@@ -513,6 +580,10 @@ export class SmartExtractor {
 
   /**
    * Process a single candidate memory: dedup → merge/create → store
+   *
+   * @param precomputedVector - Optional pre-embedded vector for the candidate.
+   *   When provided (from batch pre-embedding), skips the per-candidate embed
+   *   call to reduce API round-trips.
    */
   private async processCandidate(
     candidate: CandidateMemory,
@@ -521,6 +592,7 @@ export class SmartExtractor {
     stats: ExtractionStats,
     targetScope: string,
     scopeFilter?: string[],
+    precomputedVector?: number[],
   ): Promise<void> {
     // Profile always merges (skip dedup — admission control still applies)
     if (ALWAYS_MERGE_CATEGORIES.has(candidate.category)) {
@@ -541,9 +613,9 @@ export class SmartExtractor {
       return;
     }
 
-    // Embed the candidate for vector dedup
-    const embeddingText = `${candidate.abstract} ${candidate.content}`;
-    const vector = await this.embedder.embed(embeddingText);
+    // Use pre-computed vector if available (batch embed optimization),
+    // otherwise fall back to per-candidate embed call.
+    const vector = precomputedVector ?? await this.embedder.embed(`${candidate.abstract} ${candidate.content}`);
     if (!vector || vector.length === 0) {
       this.log("memory-pro: smart-extractor: embedding failed, storing as-is");
       await this.storeCandidate(candidate, vector || [], sessionKey, targetScope);
diff --git a/test/smart-extractor-batch-embed.test.mjs b/test/smart-extractor-batch-embed.test.mjs
new file mode 100644
index 00000000..5fb9966a
--- /dev/null
+++ b/test/smart-extractor-batch-embed.test.mjs
@@ -0,0 +1,398 @@
+/**
+ * Explicit tests for batch embedding paths in SmartExtractor.
+ *
+ * Verifies that the three refactored sites use embedBatch/embedBatch
+ * instead of serial per-element embed() calls, and that graceful
+ * fallback works when batch fails.
+ *
+ * NOTE: SmartExtractor uses INTERNAL categories (profile/preferences/entities/
+ * events/cases/patterns), NOT store categories (preference/fact/decision/entity/
+ * other). See src/memory-categories.ts for the canonical list.
+ */
+
+import { describe, it } from "node:test";
+import assert from "node:assert/strict";
+import jitiFactory from "jiti";
+
+const jiti = jitiFactory(import.meta.url, { interopDefault: true });
+const { SmartExtractor } = jiti("../src/smart-extractor.ts");
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+/** Create a mock embedder with call counters for each method. */
+function makeCountingEmbedder(options = {}) {
+  const {
+    /** If set, embedBatch will throw (simulates batch failure). */
+    batchShouldFail = false,
+    /** If set, embed will throw (simulates single embed failure). */
+    embedShouldFail = false,
+  } = options;
+
+  const calls = { embed: 0, embedBatch: 0 };
+
+  const embedder = {
+    async embed(text) {
+      calls.embed++;
+      if (embedShouldFail) throw new Error("mock embed failure");
+      // Deterministic vector based on text length for dedup stability
+      return Array(256).fill(0).map((_, i) => (text.length > 0 ? (text.charCodeAt(i % text.length) / 255) : 0));
+    },
+    async embedBatch(texts) {
+      calls.embedBatch++;
+      if (batchShouldFail) throw new Error("mock batch failure");
+      // Return vectors directly WITHOUT calling this.embed() to keep counters independent
+      return (texts || []).map((t) =>
+        Array(256).fill(0).map((_, i) => (t.length > 0 ? (t.charCodeAt(i % t.length) / 255) : 0)),
+      );
+    },
+    get calls() {
+      return { ...calls };
+    },
+  };
+
+  return { embedder, calls };
+}
+
+/** Create a minimal LLM client that returns configurable candidates.
+ *  Categories must use SmartExtractor INTERNAL names:
+ *  profile | preferences | entities | events | cases | patterns
+ */
+function makeLlm(candidates) {
+  return {
+    async completeJson(_prompt, mode) {
+      if (mode === "extract-candidates") {
+        return { memories: candidates };
+      }
+      if (mode === "dedup-decision") {
+        return { decision: "create", reason: "no match" };
+      }
+      if (mode === "merge-memory") {
+        return candidates[0] ?? null;
+      }
+      return null;
+    },
+  };
+}
+
+/** Create a minimal store that records all writes. */
+function makeStore() {
+  const entries = [];
+  const store = {
+    async vectorSearch(_vector, _limit, _minScore, _scopeFilter) {
+      return [];
+    },
+    async store(entry) {
+      entries.push({ action: "store", entry });
+      return entry;
+    },
+    async update(_id, _patch, _scopeFilter) {
+      entries.push({ action: "update", id: _id });
+    },
+    async getById(_id, _scopeFilter) {
+      return null;
+    },
+    get entries() {
+      return [...entries];
+    },
+  };
+  return store;
+}
+
+function makeExtractor(embedder, llm, store, config = {}) {
+  return new SmartExtractor(store, embedder, llm, {
+    user: "User",
+    extractMinMessages: 1,
+    extractMaxChars: 8000,
+    defaultScope: "global",
+    log() {},
+    debugLog() {},
+    ...config,
+  });
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+describe("SmartExtractor batch embedding paths", () => {
+
+  // --------------------------------------------------------------------------
+  // Test 1: Step 1b batchDedup uses embedBatch (not N×embed)
+  // --------------------------------------------------------------------------
+  it("uses embedBatch for batch-internal dedup of candidate abstracts", async () => {
+    const { embedder, calls } = makeCountingEmbedder();
+    const llm = makeLlm([
+      {
+        category: "cases",
+        abstract: "用户居住在上海市浦东新区张江高科技园区",
+        overview: "地址信息",
+        content: "用户的居住地是上海市浦东新区张江高科技园区附近。",
+      },
+      {
+        category: "cases",
+        abstract: "用户非常喜欢使用Python进行数据分析工作",
+        overview: "职业兴趣",
+        content: "用户对编程很感兴趣，特别是Python数据分析方向。",
+      },
+    ]);
+    const store = makeStore();
+    const extractor = makeExtractor(embedder, llm, store);
+
+    await extractor.extractAndPersist("用户说：我住上海，喜欢编程。", "s1");
+
+    // Should have called embedBatch once for the abstracts (Step 1b)
+    assert.ok(
+      calls.embedBatch >= 1,
+      `Expected at least 1 embedBatch call for Step 1b dedup, got ${calls.embedBatch}`,
+    );
+  });
+
+  // --------------------------------------------------------------------------
+  // Test 2: filterNoiseByEmbedding uses embedBatch (direct call)
+  // --------------------------------------------------------------------------
+  it("uses embedBatch in filterNoiseByEmbedding when noise bank is active", async () => {
+    const { embedder, calls } = makeCountingEmbedder();
+    const llm = makeLlm([]); // not used by filterNoiseByEmbedding
+    const store = makeStore(); // not used by filterNoiseByEmbedding
+
+    const noiseBank = {
+      initialized: true,
+      isNoise(_vec) { return false; },
+      learn(_vec) {},
+    };
+
+    const extractor = makeExtractor(embedder, llm, store, { noiseBank });
+
+    // Call filterNoiseByEmbedding DIRECTLY — this is the method under test.
+    // Mix of lengths: short (bypass), mid-length (needs embedding), long (bypass).
+    const inputTexts = [
+      "短",                                    // ≤8 → bypass
+      "这是一条中等长度的测试文本用于验证批量嵌入功能",  // 9-300 → needs embed
+      "这是一条另一条中等长度文本内容",            // 9-300 → needs embed
+      "x".repeat(350),                          // >300 → bypass
+    ];
+
+    const result = await extractor.filterNoiseByEmbedding(inputTexts);
+
+    // All texts should pass through (isNoise returns false for everything)
+    assert.strictEqual(result.length, 4,
+      `Expected all 4 texts to pass through, got ${result.length}`);
+
+    // embedBatch should have been called exactly once for the 2 mid-length texts
+    assert.strictEqual(calls.embedBatch, 1,
+      `Expected 1 embedBatch call for filterNoiseByEmbedding, got ${calls.embedBatch}`);
+
+    // embed() should NOT have been called (batch path used instead)
+    assert.strictEqual(calls.embed, 0,
+      `Expected 0 embed calls (batch path), got ${calls.embed}`);
+  });
+
+  // --------------------------------------------------------------------------
+  // Test 3: Batch pre-compute for non-profile candidates uses embedBatch
+  // --------------------------------------------------------------------------
+  it("pre-computes vectors via embedBatch before processing candidates", async () => {
+    const { embedder, calls } = makeCountingEmbedder();
+    const llm = makeLlm([
+      {
+        category: "preferences",
+        abstract: "用户偏好使用深色主题来减少眼睛疲劳",
+        overview: "",
+        content: "用户明确表示偏好深色主题界面设置",
+      },
+      {
+        category: "entities",
+        abstract: "张三是用户经常提到的同事名字",
+        overview: "",
+        content: "张三在用户的对话中多次被提及为同事关系",
+      },
+      {
+        category: "events",
+        abstract: "上周参加了公司年度技术分享会议",
+        overview: "",
+        content: "用户参与了公司的年度技术分享活动",
+      },
+    ]);
+    const store = makeStore();
+    const extractor = makeExtractor(embedder, llm, store);
+
+    await extractor.extractAndPersist("多候选对话内容用于测试预计算", "s1");
+
+    // At least one embedBatch call for pre-computing non-profile candidate vectors
+    assert.ok(
+      calls.embedBatch >= 1,
+      `Expected embedBatch for candidate pre-computation, got ${calls.embedBatch}`,
+    );
+  });
+
+  // --------------------------------------------------------------------------
+  // Test 4: Batch failure falls back gracefully (no crash)
+  // --------------------------------------------------------------------------
+  it("falls back to individual embed when batch pre-computation fails", async () => {
+    const { embedder, calls } = makeCountingEmbedder({
+      batchShouldFail: true,
+    });
+    const llm = makeLlm([
+      {
+        category: "cases",
+        abstract: "回退路径测试用例验证降级逻辑正确性",
+        overview: "",
+        content: "当batch失败时应该回退到单条embed调用方式",
+      },
+    ]);
+    const store = makeStore();
+    const extractor = makeExtractor(embedder, llm, store);
+
+    // Should NOT throw — batch failure is caught and logged
+    const stats = await extractor.extractAndPersist("回退测试对话内容", "s1");
+
+    // Extraction should still succeed (fallback path)
+    assert.ok(stats.created >= 0 || stats.merged >= 0 || stats.skipped >= 0,
+      `Extraction should produce stats, got ${JSON.stringify(stats)}`);
+
+    // Individual embed calls should have been made as fallback
+    assert.ok(
+      calls.embed >= 1,
+      `Expected fallback embed calls after batch failure, got embed=${calls.embed}, embedBatch=${calls.embedBatch}`,
+    );
+  });
+
+  // --------------------------------------------------------------------------
+  // Test 5: filterNoiseByEmbedding batch failure passes all texts through (direct call)
+  // --------------------------------------------------------------------------
+  it("passes all texts through when filterNoiseByEmbedding batch fails", async () => {
+    const { embedder } = makeCountingEmbedder({
+      batchShouldFail: true,
+    });
+    const llm = makeLlm([]);   // not used
+    const store = makeStore();  // not used
+
+    const noiseBank = {
+      initialized: true,
+      isNoise(_vec) { return false; },
+      learn(_vec) {},
+    };
+
+    const extractor = makeExtractor(embedder, llm, store, { noiseBank });
+
+    // Call filterNoiseByEmbedding DIRECTLY with mid-length texts that would
+    // normally be sent to embedBatch.
+    const inputTexts = [
+      "噪声过滤回退测试用例文本内容第一段",
+      "噪声过滤回退测试用例文本内容第二段",
+      "噪声过滤回退测试用例文本内容第三段",
+    ];
+
+    // Should NOT throw — batch failure returns all texts unfiltered
+    const result = await extractor.filterNoiseByEmbedding(inputTexts);
+
+    assert.strictEqual(result.length, inputTexts.length,
+      `Expected all ${inputTexts.length} texts to pass through on batch failure, got ${result.length}`);
+  });
+
+  // --------------------------------------------------------------------------
+  // Test 6: Bypass texts (short/long) are not sent to embedBatch in noise filter (direct call)
+  // --------------------------------------------------------------------------
+  it("does not send bypass texts (short/long) to embedBatch in noise filter", async () => {
+    let lastBatchInput = null;
+    const embedder = {
+      async embed() { return [0.1]; },
+      async embedBatch(texts) {
+        lastBatchInput = texts;
+        return texts.map(() => [0.1]);
+      },
+    };
+    const llm = makeLlm([]);   // not used
+    const store = makeStore();  // not used
+
+    const noiseBank = {
+      initialized: true,
+      isNoise(_vec) { return false; },
+      learn(_vec) {},
+    };
+
+    const extractor = makeExtractor(embedder, llm, store, { noiseBank });
+
+    // Call filterNoiseByEmbedding DIRECTLY with a mix of lengths
+    const inputTexts = [
+      "短",                                    // ≤8 → bypass
+      "正常长度文本用于噪声过滤测试验证逻辑正确性",   // 9-300 → needs embed
+      "x".repeat(5),                            // ≤8 → bypass
+      "另一条正常长度文本内容用于测试",             // 9-300 → needs embed
+      "x".repeat(350),                          // >300 → bypass
+    ];
+
+    await extractor.filterNoiseByEmbedding(inputTexts);
+
+    // embedBatch should have been called with ONLY mid-length texts
+    assert.ok(lastBatchInput !== null,
+      "Expected embedBatch to be called for mid-length texts");
+
+    for (const t of lastBatchInput) {
+      assert.ok(
+        t.length > 8 && t.length <= 300,
+        `Text sent to embedBatch should be in (8, 300] range, got length=${t.length}: "${t.slice(0, 40)}"`,
+      );
+    }
+
+    // Verify the specific texts that should have been batched
+    const batchedTexts = lastBatchInput.map((t) => t);
+    assert.ok(
+      batchedTexts.some((t) => t.includes("正常长度文本")),
+      "Expected mid-length text '正常长度文本...' in batch input",
+    );
+    assert.ok(
+      batchedTexts.some((t) => t.includes("另一条正常长度")),
+      "Expected mid-length text '另一条正常长度...' in batch input",
+    );
+  });
+
+  // --------------------------------------------------------------------------
+  // Test 7: Profile candidates are excluded from batch pre-computation
+  // --------------------------------------------------------------------------
+  it("excludes profile-category candidates from batch pre-computation (Step 2)", async () => {
+    // Track all embedBatch calls to distinguish Step 1b (dedup) from Step 2 (pre-compute)
+    const allBatchCalls = [];
+    const embedder = {
+      async embed() { return Array(256).fill(0.1); },
+      async embedBatch(texts) {
+        allBatchCalls.push([...texts]);
+        return texts.map(() => Array(256).fill(0.1));
+      },
+    };
+    const llm = makeLlm([
+      {
+        category: "profile",
+        abstract: "用户基本画像信息包括职业和地理位置偏好",
+        overview: "",
+        content: "这是用户的基本画像信息汇总数据。",
+      },
+    ]);
+    const store = makeStore();
+    const extractor = makeExtractor(embedder, llm, store);
+
+    await extractor.extractAndPersist("画像提取测试对话内容", "s1");
+
+    // There should be at least 2 embedBatch calls:
+    //   Call 1: Step 1b batchDedup (abstracts) — may include profile
+    //   Call 2 (or later): Step 2 pre-computation — must NOT include profile
+    assert.ok(allBatchCalls.length >= 1,
+      `Expected at least 1 embedBatch call, got ${allBatchCalls.length}`);
+
+    // The LAST embedBatch call(s) are for Step 2 pre-computation.
+    // Check that none of them contain profile candidate text.
+    const profileTexts = allBatchCalls.filter((call) =>
+      call.some((t) => t.includes("用户基本画像") || t.includes("画像信息")),
+    );
+
+    // Step 1b dedup MAY include profile abstract (that's expected).
+    // But Step 2 pre-compute MUST exclude it.
+    // With a single profile candidate, we expect at most 1 call that includes
+    // profile text (the Step 1b dedup call). If there are more, that's a bug.
+    assert.ok(
+      profileTexts.length <= 1,
+      `Only Step 1b dedup may include profile text, but got ${profileTexts.length} calls with profile text`,
+    );
+  });
+});