keymanapp · jahorton · Oct 13, 2025 · Oct 30, 2025 · Oct 31, 2025 · Feb 5, 2026
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts
@@ -5,7 +5,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types';
 
 import { ClassicalDistanceCalculation } from './classical-calculation.js';
 import { ExecutionTimer, STANDARD_TIME_BETWEEN_DEFERS } from './execution-timer.js';
-import { PathResult, SearchQuotientNode } from './search-quotient-node.js';
+import { SearchQuotientNode } from './search-quotient-node.js';
 import { subsetByChar, subsetByInterval, mergeSubset, TransformSubset } from '../transform-subsets.js';
 import TransformUtils from '../transformUtils.js';
 
@@ -573,9 +573,12 @@ export class SearchNode {
 
 export class SearchResult {
   readonly node: SearchNode;
+  // Supports SearchPath -> SearchSpace remapping.
+  readonly spaceId: number;
 
-  constructor(node: SearchNode) {
+  constructor(node: SearchNode, spaceId?: number) {
     this.node = node;
+    this.spaceId = spaceId ?? node.spaceId;
   }
 
   get inputSequence(): ProbabilityMass<Transform>[] {
@@ -622,10 +625,6 @@ export class SearchResult {
   get finalTraversal(): LexiconTraversal {
     return this.node.currentTraversal;
   }
-
-  get spaceId(): number {
-    return this.node.spaceId;
-  }
 }
 
 /**
@@ -655,15 +654,15 @@ export async function *getBestMatches(searchModules: SearchQuotientNode[], timer
 
   // Stage 2:  the fun part; actually searching!
   do {
-    const entry: SearchResult = timer.time(() => {
-      if((priorResultsQueue.peek()?.totalCost ?? Number.POSITIVE_INFINITY) < spaceQueue.peek().currentCost) {
+    const entry = timer.time(() => {
+      if((priorResultsQueue.peek()?.totalCost ?? Number.POSITIVE_INFINITY) <= spaceQueue.peek().currentCost) {
         const result = priorResultsQueue.dequeue();
         currentReturns[result.node.resultKey] = result.node;
         return result;
       }
 
       let lowestCostSource = spaceQueue.dequeue();
-      let newResult: PathResult = lowestCostSource.handleNextNode();
+      const newResult = lowestCostSource.handleNextNode();
       spaceQueue.enqueue(lowestCostSource);
 
       if(newResult.type == 'none') {
@@ -688,7 +687,7 @@ export async function *getBestMatches(searchModules: SearchQuotientNode[], timer
         if((currentReturns[node.resultKey]?.currentCost ?? Number.MAX_VALUE) > node.currentCost) {
           currentReturns[node.resultKey] = node;
           // Do not track yielded time.
-          return new SearchResult(node);
+          return new SearchResult(node, newResult.spaceId);
         }
       }
 

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts
@@ -0,0 +1,185 @@
+/*
+ * Keyman is copyright (C) SIL Global. MIT License.
+ *
+ * Created by jahorton on 2025-10-20
+ *
+ * This file defines the predictive-text engine's SearchSpace class, which is used to
- * This file defines the predictive-text engine's SearchSpace class, which is used to
+ * This file defines the predictive-text engine's SearchQuotientCluster class, which is used to
- * This file defines the predictive-text engine's SearchSpace class, which is used to
+ * This file defines the predictive-text engine's SearchQuotientCluster class, which is used to
+ * manage the search-space(s) for text corrections within the engine.
+ */
+
+import { QueueComparator as Comparator, PriorityQueue } from '@keymanapp/web-utils';
-import { QueueComparator as Comparator, PriorityQueue } from '@keymanapp/web-utils';
+import { QueueComparator, PriorityQueue } from '@keymanapp/web-utils';
-import { QueueComparator as Comparator, PriorityQueue } from '@keymanapp/web-utils';
+import { QueueComparator, PriorityQueue } from '@keymanapp/web-utils';
+import { LexicalModelTypes } from '@keymanapp/common-types';
+
+import { SearchNode, SearchResult } from './distance-modeler.js';
+import { generateSpaceSeed, InputSegment, PathResult, SearchQuotientNode } from './search-quotient-node.js';
+
+const PATH_QUEUE_COMPARATOR: Comparator<SearchQuotientNode> = (a, b) => {
-const PATH_QUEUE_COMPARATOR: Comparator<SearchQuotientNode> = (a, b) => {
+const PATH_QUEUE_COMPARATOR: QueueComparator<SearchQuotientNode> = (a, b) => {
-const PATH_QUEUE_COMPARATOR: Comparator<SearchQuotientNode> = (a, b) => {
+const PATH_QUEUE_COMPARATOR: QueueComparator<SearchQuotientNode> = (a, b) => {
+  return a.currentCost - b.currentCost;
+}
+
+// The set of search spaces corresponding to the same 'context' for search.
+// Whenever a wordbreak boundary is crossed, a new instance should be made.
+export class SearchQuotientCluster implements SearchQuotientNode {
+  // While most functions can be done directly from SearchSpace, merging and splitting will need access
+  // to SearchPath-specific members.  It's also cleaner to not allow nested SearchClusters while we
+  // haven't worked out support for such a scenario.
+  private selectionQueue: PriorityQueue<SearchQuotientNode> = new PriorityQueue(PATH_QUEUE_COMPARATOR);
+  readonly spaceId: number;
+
+  // We use an array and not a PriorityQueue b/c batch-heapifying at a single point in time
+  // is cheaper than iteratively building a priority queue.
+  /**
+   * This tracks all paths that have reached the end of a viable input-matching path - even
+   * those of lower cost that produce the same correction as other paths.
+   *
+   * When new input is received, its entries are then used to append edges to the path in order
+   * to find potential paths to reach a new viable end.
+   */
+  private completedPaths?: SearchNode[] = [];
+
+  /**
+   * Acts as a Map that prevents duplicating a correction-search path if reached
+   * more than once.
+   */
+  protected get processedEdgeSet(): {[pathKey: string]: boolean} {
+    return this._processedEdgeSet;
+  }
+
+  private _processedEdgeSet?: {[pathKey: string]: boolean} = {};
+
+  /**
+   * Provides a heuristic for the base cost at each depth if the best
+   * individual input were taken at that level.
+   */
+  readonly lowestPossibleSingleCost: number;
+
+  /**
+   * Constructs a fresh SearchSpace instance for used in predictive-text correction
-   * Constructs a fresh SearchSpace instance for used in predictive-text correction
+   * Constructs a fresh SearchSpace instance for use in predictive-text correction
-   * Constructs a fresh SearchSpace instance for used in predictive-text correction
+   * Constructs a fresh SearchSpace instance for use in predictive-text correction
+   * and suggestion searches.
+   * @param baseSpaceId
+   * @param model
-   * @param baseSpaceId
-   * @param model
+   * @param inboundPaths
-   * @param baseSpaceId
-   * @param model
+   * @param inboundPaths
+   */
+  constructor(inboundPaths: SearchQuotientNode[]) {
+    if(inboundPaths.length == 0) {
+      throw new Error("SearchCluster requires an array with at least one SearchPath");
+    }
+
+    let lowestPossibleSingleCost = Number.POSITIVE_INFINITY;
+    const firstPath = inboundPaths[0];
+    const inputCount = firstPath.inputCount;
+    const codepointLength = firstPath.codepointLength;
+    const sourceRangeKey = firstPath.sourceRangeKey;
+
+    for(let path of inboundPaths) {
+      if(path.inputCount != inputCount || path.codepointLength != codepointLength) {
+        throw new Error(`SearchPath does not share same properties as others in the cluster:  inputCount ${path.inputCount} vs ${inputCount}, codepointLength ${path.codepointLength} vs ${codepointLength}`);
+      }
+
+      // If there's a source-range key mismatch - via mismatch in count or in actual ID, we have an error.
+      if(path.sourceRangeKey != sourceRangeKey) {
+        throw new Error(`SearchPath does not share the same source identifiers as others in the cluster`);
+      }
+
+      lowestPossibleSingleCost = Math.min(lowestPossibleSingleCost, path.lowestPossibleSingleCost);
+    }
+
+    this.spaceId = generateSpaceSeed();
+
+    this.lowestPossibleSingleCost = lowestPossibleSingleCost;
+    this.completedPaths = inboundPaths.flatMap(p => p.previousResults).map(r => r.node);
+    this.selectionQueue.enqueueAll(inboundPaths);
+
+    return;
+  }
+
+  public get inputCount(): number {
+    return this.selectionQueue.peek()?.inputCount ?? 0;
+  }
+
+  public get bestExample(): {text: string, p: number} {
+    const bestPrefixes = this.selectionQueue.toArray().map(p => p.bestExample);
+    return bestPrefixes.reduce((max, curr) => max.p < curr.p ? curr : max);
+  }
+
+  public get parents(): SearchQuotientNode[] {
+    return this.selectionQueue.toArray().slice();
+  }
+
+  increaseMaxEditDistance() {
+    // By extracting the entries from the priority queue and increasing distance outside of it as a batch job,
+    // we get an O(N) implementation, rather than the O(N log N) that would result from maintaining the original queue.
+    const entries = this.selectionQueue.toArray();
+
+    entries.forEach((path) => path.increaseMaxEditDistance());
+
+    // Since we just modified the stored instances, and the costs may have shifted, we need to re-heapify.
+    this.selectionQueue = new PriorityQueue<SearchQuotientNode>(PATH_QUEUE_COMPARATOR, entries.slice());
+  }
+
+  /**
+   * When true, this indicates that the currently-represented portion of context
+   * has fat-finger data available, which itself indicates that the user has
+   * corrections enabled.
+   */
+    get correctionsEnabled(): boolean {
-    get correctionsEnabled(): boolean {
+  get correctionsEnabled(): boolean {
-    get correctionsEnabled(): boolean {
+  get correctionsEnabled(): boolean {
+    const paths = this.selectionQueue.toArray();
+    // When corrections are disabled, the Web engine will only provide individual Transforms
+    // for an input, not a distribution.  No distributions means we shouldn't do corrections.
+    return !!paths.find(p => p.correctionsEnabled);
+  }
+
+  public get currentCost(): number {
+    return this.selectionQueue.peek()?.currentCost ?? Number.POSITIVE_INFINITY;
+  }
+
+  /**
+   * Retrieves the lowest-cost / lowest-distance edge from the selection queue,
+   * checks its validity as a correction to the input text, and reports on what
+   * sort of result the edge's destination node represents.
+   * @returns
+   */
+  public handleNextNode(): PathResult {
+    const bestPath = this.selectionQueue.dequeue();
+    const currentResult = bestPath.handleNextNode();
+    this.selectionQueue.enqueue(bestPath);
+
+    if(currentResult.type == 'complete') {
+      this.completedPaths?.push(currentResult.finalNode);
+      currentResult.spaceId = this.spaceId;
+    }
+
+    return currentResult;
+  }
+
+  public get previousResults(): SearchResult[] {
+    return this.completedPaths?.map((n => new SearchResult(n, this.spaceId))) ?? [];
+  }
+
+  get model(): LexicalModelTypes.LexicalModel {
+    return this.parents[0].model;
+  }
+
+  get codepointLength(): number {
+    return this.parents[0].codepointLength;
+  }
+
+  get inputSegments(): InputSegment[] {
+    return this.parents[0].inputSegments;
+  };
+
+  /**
+   * Gets a compact string-based representation of `inputRange` that
+   * maps compatible token source ranges to each other.
+   */
+  get sourceRangeKey(): string {
+    return this.parents[0].sourceRangeKey;
+  }
+
+  merge(space: SearchQuotientNode): SearchQuotientNode {
+    throw new Error('Method not implemented.');
+  }
+
+  split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] {
+    throw new Error('Method not implemented.');
+  }
+}
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts b/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts
@@ -5,7 +5,7 @@ export * from './correction/context-tokenization.js';
 export { ContextTracker } from './correction/context-tracker.js';
 export { ContextTransition } from './correction/context-transition.js';
 export * from './correction/distance-modeler.js';
-export * from './correction/search-quotient-node.js';
+export * from './correction/search-quotient-cluster.js';
 export * from './correction/search-quotient-spur.js';
 export * from './correction/search-quotient-node.js';
 export * from './correction/legacy-quotient-root.js';

diff --git a/...rc/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts b/...rc/test/auto/headless/engine/predictive-text/worker-thread/context/context-state.tests.ts
@@ -256,12 +256,8 @@ describe('ContextState', () => {
       assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1);
       // empty transform
       assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1);
-
-      // if(!newContextMatch.final.tokenization.alignment.canAlign) {
-      //   assert.fail("context alignment failed");
-      // }
-      // assert.equal(newContextMatch.final.tokenization.alignment.leadTokenShift, 0);
-      // assert.equal(newContextMatch.final.tokenization.alignment.tailTokenShift, 2);
+      assert.isTrue(state.tokenization.tail.searchModule instanceof SearchQuotientSpur);
+      assert.deepEqual((state.tokenization.tail.searchModule as SearchQuotientSpur).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]);
     });
 
     it("properly matches and aligns when whitespace before final empty token is extended", function() {
@@ -286,8 +282,10 @@ describe('ContextState', () => {
       // Two whitespaces, one of which is new!
       const preTail = state.tokenization.tokens[state.tokenization.tokens.length - 2];
       assert.equal(preTail.searchModule.inputCount, 2);
-      assert.deepEqual((preTail.searchModule as SearchQuotientSpur).lastInput, [{sample: transform, p: 1}]);
+      assert.deepEqual((preTail.searchModule.parents[0] as SearchQuotientSpur).lastInput, [{sample: transform, p: 1}]);
       assert.equal(state.tokenization.tail.searchModule.inputCount, 1);
+      assert.isTrue(state.tokenization.tail.searchModule instanceof SearchQuotientSpur);
+      assert.deepEqual((state.tokenization.tail.searchModule as SearchQuotientSpur).lastInput, [{sample: { insert: '', deleteLeft: 0 }, p: 1}]);
     });
 
     it("properly matches and aligns when a 'wordbreak' is removed via backspace", function() {
@@ -304,12 +302,6 @@ describe('ContextState', () => {
       let newContextMatch = baseState.analyzeTransition(existingContext, toWrapperDistribution(transform));
       assert.isOk(newContextMatch?.final);
       assert.deepEqual(newContextMatch?.final.tokenization.tokens.map(token => token.exampleInput), rawTokens);
-
-      // if(!newContextMatch.final.tokenization.alignment.canAlign) {
-      //   assert.fail("context alignment failed");
-      // }
-      // assert.equal(newContextMatch.final.tokenization.alignment.leadTokenShift, 0);
-      // assert.equal(newContextMatch.final.tokenization.alignment.tailTokenShift, -2);
     });
 
     it("properly matches and aligns when an implied 'wordbreak' occurs (as when following \"'\")", function() {
@@ -332,12 +324,6 @@ describe('ContextState', () => {
       let state = newContextMatch.final;
       assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1);
       assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1);
-
-      // if(!newContextMatch.final.tokenization.alignment.canAlign) {
-      //   assert.fail("context alignment failed");
-      // }
-      // assert.equal(newContextMatch.final.tokenization.alignment.leadTokenShift, 0);
-      // assert.equal(newContextMatch.final.tokenization.alignment.tailTokenShift, 1);
     })
 
     // Needs improved context-state management (due to 2x tokens)
@@ -398,12 +384,6 @@ describe('ContextState', () => {
       assert.equal(
         state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1
       );
-
-      // if(!newContextMatch.final.tokenization.alignment.canAlign) {
-      //   assert.fail("context alignment failed");
-      // }
-      // assert.equal(newContextMatch.final.tokenization.alignment.leadTokenShift, 0);
-      // assert.equal(newContextMatch.final.tokenization.alignment.tailTokenShift, 2);
     });
 
     it("properly matches and aligns when tail token is modified AND a 'wordbreak' is added'", function() {
@@ -427,15 +407,9 @@ describe('ContextState', () => {
       let state = newContextMatch.final;
       assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 2].searchModule.inputCount, 1);
       assert.equal(state.tokenization.tokens[state.tokenization.tokens.length - 1].searchModule.inputCount, 1);
-
-      // if(!newContextMatch.final.tokenization.alignment.canAlign) {
-      //   assert.fail("context alignment failed");
-      // }
-      // assert.equal(newContextMatch.final.tokenization.alignment.leadTokenShift, 0);
-      // assert.equal(newContextMatch.final.tokenization.alignment.tailTokenShift, 2);
     });
 
-    it('handles case where tail token is split into three rather than two', function() {
+    it.skip('handles case where tail token is split into three rather than two', function() {
       let baseContext = models.tokenize(defaultBreaker, {
         left: "text'", startOfBuffer: true, endOfBuffer: true
       });

diff --git a/.../auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts b/.../auto/headless/engine/predictive-text/worker-thread/context/context-tokenization.tests.ts
@@ -561,7 +561,7 @@ describe('ContextTokenization', function() {
       }
     });
 
-    it('handles case that triggers a token merge:  can+\'+t', () => {
+    it.skip('handles case that triggers a token merge:  can+\'+t', () => {
       const baseTokens = ['an', ' ', 'apple', ' ', 'a', ' ', 'day', ' ', 'can', '\''];
       const baseTokenization = new ContextTokenization(baseTokens.map(t => toToken(t)));
 
@@ -625,7 +625,7 @@ describe('ContextTokenization', function() {
       });
     });
 
-    it('handles case that triggers a token split:  can\' +. => can, \', .', () => {
+    it.skip('handles case that triggers a token split:  can\' +. => can, \', .', () => {
       const baseTokens = ['an', ' ', 'apple', ' ', 'a', ' ', 'day', ' ', 'can\''];
       const baseTokenization = new ContextTokenization(baseTokens.map(t => toToken(t)));