@@ -5,6 +5,7 @@ import { defaultWordbreaker, WordBreakProperty } from '@keymanapp/models-wordbre
55
66import TransformUtils from './transformUtils.js' ;
77import { determineModelTokenizer , determineModelWordbreaker , determinePunctuationFromModel } from './model-helpers.js' ;
8+ import { ContextTokenization } from './correction/context-tokenization.js' ;
89import { ContextTracker } from './correction/context-tracker.js' ;
910import { ContextState , determineContextSlideTransform } from './correction/context-state.js' ;
1011import { ExecutionTimer } from './correction/execution-timer.js' ;
@@ -326,6 +327,7 @@ export function determineContextTransition(
326327 */
327328export function determineSuggestionAlignment (
328329 transition : ContextTransition ,
330+ tokenization : ContextTokenization ,
329331 lexicalModel : LexicalModel
330332) : {
331333 /**
@@ -338,7 +340,7 @@ export function determineSuggestionAlignment(
338340 */
339341 deleteLeft : number
340342} {
341- const transitionEdits = transition . final . tokenization . transitionEdits ;
343+ const transitionEdits = tokenization . transitionEdits ;
342344 const context = transition . base . context ;
343345 const postContext = transition . final . context ;
344346 const inputTransform = transition . inputDistribution [ 0 ] . sample ;
@@ -349,13 +351,13 @@ export function determineSuggestionAlignment(
349351 const wordbreak = determineModelWordbreaker ( lexicalModel ) ;
350352
351353 // Is the token under construction newly-constructed / is there no pre-existing root?
352- if ( transition . preservationTransform && inputTransformMap ?. has ( 1 ) ) {
354+ if ( tokenization . taillessTrueKeystroke && inputTransformMap ?. has ( 1 ) ) {
353355 return {
354356 // If the new token is due to whitespace or due to a different input type
355357 // that would likely imply a tokenization boundary, infer 'new word' mode.
356358 // Apply any part of the context change that is not considered to be up
357359 // for correction.
358- predictionContext : models . applyTransform ( transition . preservationTransform , context ) ,
360+ predictionContext : models . applyTransform ( tokenization . taillessTrueKeystroke , context ) ,
359361 // As the word/token being corrected/predicted didn't originally exist,
360362 // there's no part of it to 'replace'. (Suggestions are applied to the
361363 // pre-transform state.)
@@ -379,7 +381,7 @@ export function determineSuggestionAlignment(
379381
380382 // Did the wordbreaker (or similar) append a blank token before the caret? If so,
381383 // preserve that by preventing corrections from triggering left-deletion.
382- if ( transition . final . tokenization . tail . isEmptyToken ) {
384+ if ( tokenization . tail . isEmptyToken ) {
383385 deleteLeft = 0 ;
384386 }
385387
@@ -453,10 +455,6 @@ export async function correctAndEnumerate(
453455 }
454456 }
455457
456- // No matter the prediction, once we know the root of the prediction, we'll always 'replace' the
457- // same amount of text. We can handle this before the big 'prediction root' loop.
458- const { predictionContext : predictionContext , deleteLeft } = determineSuggestionAlignment ( transition , lexicalModel ) ;
459-
460458 // TODO: Should we filter backspaces & whitespaces out of the transform distribution?
461459 // Ideally, the answer (in the future) will be no, but leaving it in right now may pose an issue.
462460
@@ -471,6 +469,13 @@ export async function correctAndEnumerate(
471469 // when no fat-finger data is available.
472470 if ( ! searchSpaces . find ( s => s . correctionsEnabled ) ) {
473471 const wordbreak = determineModelWordbreaker ( lexicalModel ) ;
472+ // The one true tokenization: no corrections permitted.
473+ const tokenization = transition . final . tokenization ;
474+
475+ // No matter the prediction, once we know the root of the prediction, we'll always 'replace' the
476+ // same amount of text. We can handle this before the big 'prediction root' loop.
477+ const { predictionContext : predictionContext , deleteLeft } = determineSuggestionAlignment ( transition , tokenization , lexicalModel ) ;
478+
474479 const predictionRoot = {
475480 sample : {
476481 insert : wordbreak ( transition . final . context ) ,
@@ -481,7 +486,7 @@ export async function correctAndEnumerate(
481486 } ;
482487
483488 const predictions = predictFromCorrections ( lexicalModel , [ predictionRoot ] , predictionContext ) ;
484- predictions . forEach ( ( entry ) => entry . preservationTransform = transition . preservationTransform ) ;
489+ predictions . forEach ( ( entry ) => entry . preservationTransform = tokenization . taillessTrueKeystroke ) ;
485490
486491 // Only one 'correction' / prediction root is allowed - the actual text.
487492 return {
@@ -501,6 +506,11 @@ export async function correctAndEnumerate(
501506 const searchSpace = searchSpaces . find ( s => s . spaceId == match . spaceId ) ;
502507 const tokenization = tokenizations . find ( t => t . spaceId == match . spaceId ) ;
503508
509+ // No matter the prediction, once we know the root of the prediction, we'll
510+ // always 'replace' the same amount of text. We can handle this before the
511+ // big 'prediction root' loop.
512+ const { predictionContext, deleteLeft } = determineSuggestionAlignment ( transition , tokenization , lexicalModel ) ;
513+
504514 // If our 'match' results in fully deleting the new token, reject it and try again.
505515 if ( match . matchSequence . length == 0 && match . inputSequence . length != 0 ) {
506516 continue ;
0 commit comments