Skip to content

Commit 13a3df3

Browse files
authored
Merge pull request #7037 from keymanapp/fix/web/pred-text-stability
fix(web): enhanced timer for prediction algorithm
2 parents 98dd9b6 + 814a492 commit 13a3df3

1 file changed

Lines changed: 140 additions & 5 deletions

File tree

common/web/lm-worker/src/correction/distance-modeler.ts

Lines changed: 140 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -526,8 +526,6 @@ namespace correction {
526526
let searchSpace = this;
527527
let currentReturns: {[mapKey: string]: SearchNode} = {};
528528

529-
// JS measures time by the number of milliseconds since Jan 1, 1970.
530-
let timeStart = Date.now();
531529
let maxTime: number;
532530
if(waitMillis == 0) {
533531
maxTime = Infinity;
@@ -537,6 +535,136 @@ namespace correction {
537535
maxTime = waitMillis;
538536
}
539537

538+
/**
539+
* This inner class is designed to help the algorithm detect its active execution time.
540+
* While there's no official JS way to do this, we can approximate it by polling the
541+
* current system time (in ms) after each iteration of a short-duration loop. Unusual
542+
* spikes in system time for a single iteration is likely to indicate that an OS
543+
* context switch occurred at some point during the iteration's execution.
544+
*/
545+
class ExecutionTimer {
546+
/**
547+
* The system time when this instance was created.
548+
*/
549+
private start: number;
550+
551+
/**
552+
* Marks the system time at the start of the currently-running loop, as noted
553+
* by a call to the `startLoop` function.
554+
*/
555+
private loopStart: number;
556+
557+
private maxExecutionTime: number;
558+
private maxTrueTime: number;
559+
560+
private executionTime: number;
561+
562+
/**
563+
* Used to track intervals in which potential context swaps by the OS may
564+
* have occurred. Context switches generally seem to pause threads for
565+
* at least 16 ms, while we expect each loop iteration to complete
566+
* within just 1 ms. So, any possible context switch should have the
567+
* longest observed change in system time.
568+
*
569+
* See `updateOutliers` for more details.
570+
*/
571+
private largestIntervals: number[] = [0];
572+
573+
constructor(maxExecutionTime: number, maxTrueTime: number) {
574+
// JS measures time by the number of milliseconds since Jan 1, 1970.
575+
this.loopStart = this.start = Date.now();
576+
this.maxExecutionTime = maxExecutionTime;
577+
this.maxTrueTime = maxTrueTime;
578+
}
579+
580+
startLoop() {
581+
this.loopStart = Date.now();
582+
}
583+
584+
markIteration() {
585+
const now = Date.now();
586+
const delta = now - this.loopStart;
587+
this.executionTime += delta;
588+
589+
/**
590+
* Update the list of the three longest system-time intervals observed
591+
* for execution of a single loop iteration.
592+
*
593+
* Ignore any zero-ms length intervals; they'd make the logic much
594+
* messier than necessary otherwise.
595+
*/
596+
if(delta) {
597+
// If the currently-observed interval is longer than the shortest of the 3
598+
// previously-observed longest intervals, replace it.
599+
if(this.largestIntervals.length > 2 && delta > this.largestIntervals[0]) {
600+
this.largestIntervals[0] = delta;
601+
} else {
602+
this.largestIntervals.push(delta);
603+
}
604+
// Puts the list in ascending order. Shortest of the list becomes the head,
605+
// longest one the tail.
606+
this.largestIntervals.sort();
607+
608+
// Then, determine if we need to update our outlier-based tweaks.
609+
this.updateOutliers();
610+
}
611+
}
612+
613+
updateOutliers() {
614+
/* Base assumption: since each loop of the search should evaluate within ~1ms,
615+
* notably longer execution times are probably context switches.
616+
*
617+
* Base assumption: OS context switches generally last at least 16ms. (Based on
618+
* a window.setTimeout() usually not evaluating for at least
619+
* that long, even if set to 1ms.)
620+
*
621+
* To mitigate these assumptions: we'll track the execution time of every loop
622+
* iteration. If the longest observation somehow matches or exceeds the length of
623+
* the next two almost-longest observations twice over... we have a very strong
624+
* 'context switch' candidate.
625+
*
626+
* Or, in near-formal math/stats: we expect a very low variance in execution
627+
* time among the iterations of the search's loops. With a very low variance,
628+
* ANY significant proportional spikes in execution time are outliers - outliers
629+
* likely caused by an OS context switch.
630+
*
631+
* Rather than do intensive math, we use a somewhat lazy approach below that
632+
* achieves the same net results given our assumptions, even when relaxed somewhat.
633+
*
634+
* The logic below relaxes the base assumptions a bit to be safe:
635+
* - [2ms, 2ms, 8ms] will cause 8ms to be seen as an outlier.
636+
* - [2ms, 3ms, 10ms] will cause 10ms to be seen as an outlier.
637+
*
638+
* Ideally:
639+
* - [1ms, 1ms, 4ms] will view 4ms as an outlier.
640+
*
641+
* So we can safely handle slightly longer average intervals and slightly shorter
642+
* OS context-switch time intervals.
643+
*/
644+
if(this.largestIntervals.length > 2) {
645+
// Precondition: the `largestIntervals` array is sorted in ascending order.
646+
// Shortest entry is at the head, longest at the tail.
647+
if(this.largestIntervals[2] >= 2 * (this.largestIntervals[0] + this.largestIntervals[1])) {
648+
this.executionTime -= this.largestIntervals[2];
649+
this.largestIntervals.pop();
650+
}
651+
}
652+
}
653+
654+
shouldTimeout(): boolean {
655+
const now = Date.now();
656+
if(this.start - now > this.maxTrueTime) {
657+
return true;
658+
}
659+
660+
return this.executionTime > this.maxExecutionTime;
661+
}
662+
663+
resetOutlierCheck() {
664+
this.largestIntervals = [];
665+
}
666+
}
667+
540668
class BatchingAssistant {
541669
currentCost = Number.MIN_SAFE_INTEGER;
542670
entries: SearchResult[] = [];
@@ -582,17 +710,22 @@ namespace correction {
582710

583711
let batcher = new BatchingAssistant();
584712

713+
const timer = new ExecutionTimer(maxTime*3, maxTime);
714+
585715
// Stage 1 - if we already have extracted results, build a queue just for them and iterate over it first.
586716
let returnedValues = Object.values(this.returnedValues);
587717
if(returnedValues.length > 0) {
588718
let preprocessedQueue = new models.PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, returnedValues);
589719

590720
// Build batches of same-cost entries.
721+
timer.startLoop();
591722
while(preprocessedQueue.count > 0) {
592723
let entry = preprocessedQueue.dequeue();
593724
let batch = batcher.checkAndAdd(entry);
725+
timer.markIteration();
594726

595727
if(batch) {
728+
// Do not track yielded time.
596729
yield batch;
597730
}
598731
}
@@ -601,22 +734,24 @@ namespace correction {
601734
// finalize the last preprocessed group without issue.
602735
let batch = batcher.tryFinalize();
603736
if(batch) {
737+
// Do not track yielded time.
604738
yield batch;
605739
}
606740
}
607741

608742
// Stage 2: the fun part; actually searching!
743+
timer.resetOutlierCheck();
744+
timer.startLoop();
609745
let timedOut = false;
610746
do {
611747
let newResult: PathResult;
612748

613749
// Search for a 'complete' path, skipping all partial paths as long as time remains.
614750
do {
615751
newResult = this.handleNextNode();
752+
timer.markIteration();
616753

617-
// (Naive) timeout check!
618-
let now = Date.now();
619-
if(now - timeStart > maxTime) {
754+
if(timer.shouldTimeout()) {
620755
timedOut = true;
621756
}
622757
} while(!timedOut && newResult.type == 'intermediate')

0 commit comments

Comments
 (0)