diff --git a/.cursor/rules.md b/.cursor/rules.md index 21e70010..bd4204c3 100644 --- a/.cursor/rules.md +++ b/.cursor/rules.md @@ -22,6 +22,8 @@ - Before marking any task complete: `pnpm typecheck && pnpm lint && pnpm run -s format:check && pnpm test` must pass; coverage guard stays green. - Update docs (plan, QA matrix, behaviour specs) as part of completion. +- Update docs (plan, QA matrix, behaviour specs) as part of completion. +- If a PR changes behaviour but does not update `docs/system_principles.md` and relevant guides/tests, CI should fail. 3. Use the Questions Log @@ -47,7 +49,7 @@ 7. Tests & docs as deliverables - Add unit/integration tests for new logic and update `docs/qa/README.md` test mapping. -- Keep `docs/lm_behavior.md` and `docs/implementation.md` in sync with behaviour changes. +- Keep `docs/guide/reference/lm-behavior.md` and `docs/implementation.md` in sync with behaviour changes. 8. Communication discipline @@ -98,7 +100,7 @@ 16. Docs & Questions discipline -- Update `docs/implementation.md`, `docs/lm_behavior.md`, and `docs/qa/README.md` for any behaviour change. +- Update `docs/implementation.md`, `docs/guide/reference/lm-behavior.md`, and `docs/qa/README.md` for any behaviour change. - Log uncertainties in `docs/questions.md`; proceed on safe defaults; revisit once answered. 17. Observability & safety @@ -114,5 +116,5 @@ - Plan and task order: `docs/implementation.md` - QA matrix and CI gates: `docs/qa/README.md` -- LM policy/behaviour: `docs/lm_behavior.md` +- LM policy/behaviour: `docs/guide/reference/lm-behavior.md` - Questions log: `docs/questions.md` diff --git a/.cursor/rules/doc_links.mdc b/.cursor/rules/doc_links.mdc index 4f65134b..9ac5522a 100644 --- a/.cursor/rules/doc_links.mdc +++ b/.cursor/rules/doc_links.mdc @@ -11,7 +11,7 @@ Links to critical files by name for Cursor memory and quick reference. - [Project Structure](docs/project_structure.md) - [Glossary](context/glossary.md) - [Project Overview](context/project_overview.md) -- [System Principles](context/system_principles.md) +- [System Principles](docs/system_principles.md) ## Key Directories - [Core Logic](core/) diff --git a/.cursor/rules/principles.mdc b/.cursor/rules/principles.mdc new file mode 100644 index 00000000..5b7a4593 --- /dev/null +++ b/.cursor/rules/principles.mdc @@ -0,0 +1,47 @@ +--- +alwaysApply: true +--- + + +# Principles Snapshot + +Human Flow & Dignity +- Human-first agency: auto-apply within band; no accept gesture; no expansion. +- Flow & rhythm: micro-corrections; defer heavy work during bursts. +- Low cognitive load: no suggestion lists; subtle underline/highlight; debug opt-in. +- Accessibility: respect reduced motion; SR announces; keyboard-first. + +Safety, Trust & Integrity +- Caret-safe, non-undoing: never edit at/after caret; band-only; no undo entries. +- Local-first privacy: prefer local; remote off unless opted in; degrade gracefully; no text persistence. +- Explainability: show reasons, tiers, and truncations; toggleable explainers. +- Fail-soft: LM errors → rules-only; single-flight + abort; drop stale. + +Adaptive Intelligence & Execution +- Context-minimal: smallest window; allow control JSON; outputs sanitized & clamped. +- Single-flight orchestration: one active gen per band; abort on input. +- Device-tier progressive: detect capabilities → tune cadence/tokens. +- Testable/observable: gates must pass; logs for merges/aborts/tiers. + +Collaboration & Delivery +- Plan order & Questions: execute tasks in plan order; capture clarifications in `docs/questions.md`. +- Green gates: typecheck/lint/format/test must pass before merge. + +See `docs/system_principles.md` for behaviours and examples. + diff --git a/.vscode/settings.json b/.vscode/settings.json index 4f0e93b7..885b000a 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,5 +5,29 @@ // • WHAT ▸ Custom dictionary entries for cSpell // • WHY ▸ Silence false positives for product keywords // • HOW ▸ Editor-only; no runtime impact - "cSpell.words": ["mindtyper", "mindtype", "gramm"] + "cSpell.words": [ + "mindtyper", + "mindtype", + "gramm", + "behaviour", + "skimmable", + "guillemets", + "QXXX", + "desaturation", + "CARETSAFE", + "precsson", + "mindtypr", + "tooll", + "emdashes", + "lstens", + "cooldown", + "Qwen", + "WCAG", + "autoplay", + "testids", + "Workerize", + "cbindgen", + "sandboxed", + "webgpu" + ] } diff --git a/config/defaultThresholds.ts b/config/defaultThresholds.ts index b9f818a5..a6c7dc1d 100644 --- a/config/defaultThresholds.ts +++ b/config/defaultThresholds.ts @@ -13,14 +13,14 @@ • WHY ▸ Harmonises behaviour across engines/UX • HOW ▸ Imported by engines and UI helpers */ -export const SHORT_PAUSE_MS = 500; // aligned with plan/docs +export const SHORT_PAUSE_MS = 300; // perceptual rhythm default per principles export const LONG_PAUSE_MS = 2000; // aligned with plan/docs export const MAX_SWEEP_WINDOW = 80; // chars behind caret // Mutable runtime-configurable thresholds (with safe defaults) let typingTickMs = 75; // 60–90 ms sweet spot -let minValidationWords = 3; -let maxValidationWords = 8; +let minValidationWords = 5; +let maxValidationWords = 5; // Accessors to support live tuning (demo controls) export function getTypingTickMs(): number { diff --git a/core/diffusionController.ts b/core/diffusionController.ts index 780a35bc..9bca16f8 100644 --- a/core/diffusionController.ts +++ b/core/diffusionController.ts @@ -16,8 +16,10 @@ import { getMaxValidationWords, } from '../config/defaultThresholds'; import { tidySweep } from '../engines/tidySweep'; +import { replaceRange } from '../utils/diff'; import type { LMAdapter } from './lm/types'; import { renderValidationBand, renderHighlight } from '../ui/highlighter'; +import { createLogger } from './logger'; export interface DiffusionState { text: string; @@ -37,7 +39,14 @@ export interface BandPolicy { // Context7 docs: Intl.Segmenter provides granularity: 'word' for word-like segments // The isWordLike property indicates segments that are actual words vs punctuation/spaces export function createDiffusionController(policy?: BandPolicy, _lmAdapter?: LMAdapter) { - const seg = new Intl.Segmenter(undefined, { granularity: 'word' }); + // Safari/older browsers: Intl.Segmenter may be missing or partial. Provide a fallback. + let seg: Intl.Segmenter | null = null; + try { + seg = new Intl.Segmenter(undefined, { granularity: 'word' }); + } catch { + seg = null; + } + const log = createLogger('diffusion'); let state: DiffusionState = { text: '', caret: 0, frontier: 0 }; // Throttle rendering to avoid UI storms (esp. Safari). ~60fps ceiling. @@ -50,6 +59,19 @@ export function createDiffusionController(policy?: BandPolicy, _lmAdapter?: LMAd lastRenderMs = now; const renderRange = policy ? policy.computeRenderRange(state) : bandRange(); renderValidationBand(renderRange); + // Emit selection snapshot for LM inspector/debug + try { + const { start, end } = renderRange; + const ctxBefore = state.text.slice(Math.max(0, start - 60), start); + const span = state.text.slice(start, end); + const ctxAfter = state.text.slice(end, Math.min(state.text.length, end + 60)); + (globalThis as unknown as Record).__mtLastLMSelection = { + band: renderRange, + span, + ctxBefore, + ctxAfter, + }; + } catch {} } } @@ -63,22 +85,38 @@ export function createDiffusionController(policy?: BandPolicy, _lmAdapter?: LMAd state.text = text; state.caret = caret; clampFrontier(); + log.debug('update', { caret, frontier: state.frontier, textLen: text.length }); maybeRender(); } + function iterateWordSegments(slice: string): Array<{ index: number; segment: string }> { + const out: Array<{ index: number; segment: string }> = []; + if (seg) { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + for (const s of (seg as any).segment(slice)) { + if ((s as { isWordLike?: boolean }).isWordLike) + out.push({ index: s.index, segment: s.segment }); + } + return out; + } + // Fallback: unicode word run matches + const re = /[\p{L}\p{N}_]+/gu; + let m: RegExpExecArray | null; + while ((m = re.exec(slice))) { + out.push({ index: m.index, segment: m[0] }); + } + return out; + } + function bandRange(): { start: number; end: number } { // Compute a range covering min..max words behind caret, starting at frontier const slice = state.text.slice(state.frontier, state.caret); - const words: Array<{ start: number; end: number }> = []; - for (const s of seg.segment(slice)) { - // Cast necessary due to incomplete TypeScript DOM types for Intl.Segmenter - // isWordLike property exists but not in TS lib DOM types yet - if ((s as { isWordLike?: boolean }).isWordLike) { - const start = state.frontier + s.index; - const end = start + s.segment.length; - words.push({ start, end }); - } - } + const words: Array<{ start: number; end: number }> = iterateWordSegments(slice).map( + (s) => ({ + start: state.frontier + s.index, + end: state.frontier + s.index + s.segment.length, + }), + ); if (words.length === 0) return { start: state.frontier, end: state.caret }; const minWords = getMinValidationWords(); const maxWords = getMaxValidationWords(); @@ -90,13 +128,11 @@ export function createDiffusionController(policy?: BandPolicy, _lmAdapter?: LMAd function nextWordRange(): { start: number; end: number } | null { if (state.frontier >= state.caret) return null; const slice = state.text.slice(state.frontier, state.caret); - for (const s of seg.segment(slice)) { - // Cast necessary due to incomplete TypeScript DOM types for Intl.Segmenter - if ((s as { isWordLike?: boolean }).isWordLike) { - const start = state.frontier + s.index; - const end = start + s.segment.length; - if (end <= state.caret) return { start, end }; - } + const segments = iterateWordSegments(slice); + for (const s of segments) { + const start = state.frontier + s.index; + const end = start + s.segment.length; + if (end <= state.caret) return { start, end }; } return null; } @@ -106,8 +142,28 @@ export function createDiffusionController(policy?: BandPolicy, _lmAdapter?: LMAd if (!r) return; const res = tidySweep({ text: state.text, caret: state.caret, hint: r }); if (res.diff) { - renderHighlight({ start: res.diff.start, end: res.diff.end }); - state.frontier = Math.max(state.frontier, res.diff.end); + // Do not log user text per privacy policy + log.debug('diff', { + start: res.diff.start, + end: res.diff.end, + }); + // Apply the diff to local state for consistency with host + try { + const updated = replaceRange( + state.text, + res.diff.start, + res.diff.end, + res.diff.text, + state.caret, + ); + state.text = updated; + } catch { + // If safety check fails, skip applying but still advance to avoid stalls + log.warn('replaceRange failed (safety)', { caret: state.caret }); + } + renderHighlight({ start: res.diff.start, end: res.diff.end, text: res.diff.text }); + const newEnd = res.diff.start + res.diff.text.length; + state.frontier = Math.max(state.frontier, newEnd); } else { // Even without a replacement, consider the word validated this tick state.frontier = Math.max(state.frontier, r.end); diff --git a/core/lm/policy.ts b/core/lm/policy.ts index e11d75ce..1581d679 100644 --- a/core/lm/policy.ts +++ b/core/lm/policy.ts @@ -36,6 +36,7 @@ export interface SpanAndPrompt { prompt: string | null; span: string | null; maxNewTokens: number; + controlJson: string; } export function selectSpanAndPrompt( @@ -44,27 +45,50 @@ export function selectSpanAndPrompt( cfg: LMBehaviorConfig = defaultLMBehaviorConfig, ): SpanAndPrompt { const band = computeSimpleBand(text, caret); - if (!band) return { band: null, prompt: null, span: null, maxNewTokens: 0 }; + if (!band) + return { band: null, prompt: null, span: null, maxNewTokens: 0, controlJson: '{}' }; const span = text.slice(band.start, band.end); if (span.length < cfg.minSpanChars) - return { band: null, prompt: null, span: null, maxNewTokens: 0 }; + return { band: null, prompt: null, span: null, maxNewTokens: 0, controlJson: '{}' }; if (span.length > cfg.maxSpanChars) - return { band: null, prompt: null, span: null, maxNewTokens: 0 }; + return { band: null, prompt: null, span: null, maxNewTokens: 0, controlJson: '{}' }; if (cfg.enforceWordBoundaryAtEnd && /\w$/.test(span)) { - return { band: null, prompt: null, span: null, maxNewTokens: 0 }; + return { band: null, prompt: null, span: null, maxNewTokens: 0, controlJson: '{}' }; } const ctxLeft = Math.max(0, band.start - cfg.contextLeftChars); const ctxRight = Math.min(text.length, band.end + cfg.contextRightChars); const ctxBefore = text.slice(ctxLeft, band.start); const ctxAfter = text.slice(band.end, ctxRight); - const instruction = - 'Correct ONLY the Span. Do not add explanations or extra words. Return just the corrected Span.'; - const prompt = `${instruction}\nContext before: «${ctxBefore}»\nSpan: «${span}»\nContext after: «${ctxAfter}»`; + const instruction = [ + 'Correct ONLY the Span. Return the corrected Span text exactly.', + '- No explanations or extra words', + '- No quotes or labels', + '- Keep meaning and style; fix grammar, clarity, and punctuation', + '- Keep length close to the Span (do not expand beyond it)', + ].join('\n'); + const control = { + mode: 'grammar_only', + tone: { formality: 0.0, warmth: 0.0, directness: 0.0 }, + caps: { maxRewriteChars: cfg.maxSpanChars }, + safety: { noExternalKnowledge: true }, + v: 1, + }; + const controlJson = JSON.stringify(control, null, 2); + const prompt = `${instruction}\n\nCONTROL (JSON): «${controlJson}»\nContext before: «${ctxBefore}»\nSpan: «${span}»\nContext after: «${ctxAfter}»`; const maxNewTokens = Math.min( Math.ceil(span.length * cfg.maxTokensFactor) + 6, cfg.maxTokensCap, ); - return { band, prompt, span, maxNewTokens }; + // Expose components for UI debug + (globalThis as unknown as Record).__mtLastLMSelection = { + band, + prompt, + span, + ctxBefore, + ctxAfter, + controlJson, + }; + return { band, prompt, span, maxNewTokens, controlJson }; } export function postProcessLMOutput( diff --git a/core/lm/transformersClient.ts b/core/lm/transformersClient.ts index 2b448dd0..fd39647c 100644 --- a/core/lm/transformersClient.ts +++ b/core/lm/transformersClient.ts @@ -23,14 +23,18 @@ export function detectBackend(): LMCapabilities['backend'] { } } catch {} try { - // In browsers without WebGPU but with WASM SIMD, a WASM backend may be used by libs - return 'wasm'; + // In browsers without WebGPU but with WebAssembly (SIMD/threads optional), use WASM + if (typeof WebAssembly !== 'undefined') return 'wasm'; } catch {} return 'cpu'; } export function createTransformersAdapter(runner: TokenStreamer): LMAdapter { let aborted = false; + let inflight: Promise | null = null; + let resolveInflight: (() => void) | null = null; + let lastMergeAt = 0; + const COOLDOWN_MS = 160; let caps: LMCapabilities | null = null; return { @@ -43,13 +47,35 @@ export function createTransformersAdapter(runner: TokenStreamer): LMAdapter { aborted = true; }, async *stream(params: LMStreamParams): AsyncIterable { + // enforce cooldown + const now = Date.now(); + const since = now - lastMergeAt; + if (since < COOLDOWN_MS) { + await new Promise((r) => setTimeout(r, COOLDOWN_MS - since)); + } + // single‑flight: cancel previous + aborted = true; + await inflight?.catch(() => {}); aborted = false; + const { text, band } = params; const prompt = text.slice(band.start, band.end); const stream = runner.generateStream({ prompt }); - for await (const chunk of stream) { - if (aborted) return; - yield chunk; + + // create a completion promise resolved when this stream finishes + inflight = new Promise((resolve) => { + resolveInflight = resolve; + }); + + try { + for await (const chunk of stream) { + if (aborted) return; + yield chunk; + } + lastMergeAt = Date.now(); + } finally { + resolveInflight?.(); + resolveInflight = null; } }, }; diff --git a/core/lm/transformersRunner.ts b/core/lm/transformersRunner.ts index 82ca9963..70ec1a20 100644 --- a/core/lm/transformersRunner.ts +++ b/core/lm/transformersRunner.ts @@ -29,7 +29,7 @@ export interface QwenRunnerOptions { * on first use to avoid bloating initial bundles. */ type GeneratorFn = (( - messages: unknown[], + input: unknown, opts: Record, ) => Promise) & { tokenizer: unknown; @@ -40,99 +40,189 @@ type LoadedGenerator = { TextStreamer: new (tokenizer: unknown, opts: Record) => unknown; }; -export function createQwenTokenStreamer(options?: QwenRunnerOptions): TokenStreamer { - let generatorPromise: Promise | null = null; +// ────────────────────────────────────────────────────────────── +// Singleton loader (locks to first options seen for the session) +// ────────────────────────────────────────────────────────────── +let singletonGenerator: Promise | null = null; +let singletonInitOptions: QwenRunnerOptions | undefined; + +async function loadGeneratorSingleton( + options?: QwenRunnerOptions, +): Promise { + if (singletonGenerator) return singletonGenerator; + singletonInitOptions = options; const modelId = options?.modelId ?? 'onnx-community/Qwen2.5-0.5B-Instruct'; - const maxNewTokensDefault = options?.maxNewTokens ?? 64; - - async function loadGenerator() { - if (generatorPromise) return generatorPromise; - generatorPromise = (async (): Promise => { - // Dynamic import keeps core decoupled from heavy deps - const { pipeline, TextStreamer, env } = (await import( - '@huggingface/transformers' - )) as unknown as { - pipeline: ( - task: string, - model: string, - options: Record, - ) => Promise; - TextStreamer: new (tokenizer: unknown, opts: Record) => unknown; - env: Record; - }; - - // Environment configuration for self‑hosting and fallbacks - if (options?.localModelPath) { - (env as Record).localModelPath = options.localModelPath; - } - // Ensure exactly one of local/remote is enabled - if (options?.localOnly) { - (env as Record).allowLocalModels = true; - (env as Record).allowRemoteModels = false as unknown as never; - } else { - (env as Record).allowLocalModels = false as unknown as never; - (env as Record).allowRemoteModels = true as unknown as never; - } - if (options?.localOnly && options?.wasmPaths) { - const e = env as unknown as { - backends?: { onnx?: { wasm?: { wasmPaths?: string } } }; - } & Record; - e.backends = e.backends ?? {}; - e.backends.onnx = e.backends.onnx ?? { wasm: {} }; - e.backends.onnx.wasm = e.backends.onnx.wasm ?? {}; - e.backends.onnx.wasm.wasmPaths = options.wasmPaths; + singletonGenerator = (async (): Promise => { + // Dynamic import keeps core decoupled from heavy deps + const { pipeline, TextStreamer, env } = (await import( + '@huggingface/transformers' + )) as unknown as { + pipeline: ( + task: string, + model: string, + options: Record, + ) => Promise; + TextStreamer: new (tokenizer: unknown, opts: Record) => unknown; + env: Record; + }; + + const opts = singletonInitOptions; + // Environment configuration for self‑hosting and fallbacks + if (opts?.localModelPath) { + (env as Record).localModelPath = opts.localModelPath; + } + // Ensure exactly one of local/remote is enabled + if (opts?.localOnly) { + (env as Record).allowLocalModels = true; + (env as Record).allowRemoteModels = false as unknown as never; + } else { + (env as Record).allowLocalModels = false as unknown as never; + (env as Record).allowRemoteModels = true as unknown as never; + } + if (opts?.localOnly && opts?.wasmPaths) { + const e = env as unknown as { + backends?: { onnx?: { wasm?: { wasmPaths?: string } } }; + } & Record; + e.backends = e.backends ?? {}; + e.backends.onnx = e.backends.onnx ?? { wasm: {} }; + e.backends.onnx.wasm = e.backends.onnx.wasm ?? {}; + e.backends.onnx.wasm.wasmPaths = opts.wasmPaths; + } + + const backend = detectBackend(); + const device = backend === 'webgpu' ? 'webgpu' : backend === 'wasm' ? 'wasm' : 'cpu'; + + const gen = await pipeline('text-generation', modelId, { + dtype: 'q4', + device, + } as Record); + + console.info('[LM] ready', { + modelId, + backend, + device, + localOnly: opts?.localOnly, + }); + + return { gen, TextStreamer } as LoadedGenerator; + })(); + return singletonGenerator; +} + +export function createQwenTokenStreamer(options?: QwenRunnerOptions): TokenStreamer { + // Default to local-only unless explicitly disabled per session + const localOnlyDefault = options?.localOnly ?? true; + // Device-tier default token caps + const backend = detectBackend(); + const tierDefaultMaxTokens = + options?.maxNewTokens ?? (backend === 'webgpu' ? 48 : backend === 'wasm' ? 24 : 16); + const maxNewTokensDefault = tierDefaultMaxTokens; + + return { + async *generateStream(input: { prompt: string; maxNewTokens?: number }) { + const { gen, TextStreamer } = await loadGeneratorSingleton({ + ...options, + localOnly: localOnlyDefault, + }); + + // Simple async queue to yield chunks as they arrive (word-by-word) + const chunks: string[] = []; + let resolver: (() => void) | null = null; + let closed = false; + let accum = ''; + + const boundaryRegex = /[\s.,!?;:—"'”’)\]\}]/; + function isBoundaryChar(ch: string): boolean { + return boundaryRegex.test(ch); } - const backend = detectBackend(); - const device = - backend === 'webgpu' ? 'webgpu' : backend === 'wasm' ? 'wasm' : 'cpu'; + function pushChunk(s: string) { + if (!s) return; + chunks.push(s); + try { + const g = globalThis as unknown as { __mtLastLMChunks?: string[] }; + g.__mtLastLMChunks = (g.__mtLastLMChunks ?? []).concat(s).slice(-10); + } catch {} + if (resolver) { + const r = resolver; + resolver = null; + r(); + } + } - const gen = await pipeline('text-generation', modelId, { - dtype: 'q4', - device, - } as Record); + function flushWords(final: boolean) { + // Emit segments ending at a boundary char (e.g., space or punctuation) + for (let i = 0; i < accum.length; i++) { + if (isBoundaryChar(accum[i])) { + const emit = accum.slice(0, i + 1); + pushChunk(emit); + accum = accum.slice(i + 1); + i = -1; // restart scan on the shortened buffer + } + } + if (final && accum) { + pushChunk(accum); + accum = ''; + } + } - console.info('[LM] ready', { - modelId, - backend, - device, - localOnly: options?.localOnly, - }); + function close() { + closed = true; + if (resolver) { + const r = resolver; + resolver = null; + r(); + } + } - return { gen, TextStreamer } as LoadedGenerator; - })(); - return generatorPromise; - } + async function waitForChunk(): Promise { + if (chunks.length || closed) return; + return new Promise((r) => { + resolver = r; + }); + } - return { - async *generateStream(input: { prompt: string; maxNewTokens?: number }) { - const { gen, TextStreamer } = await loadGenerator(); - let buffer = ''; + let lastEmitAt = 0; + const COALESCE_MS = 25; const streamer = new TextStreamer(gen.tokenizer, { skip_prompt: true, skip_special_tokens: true, callback_function: (text: string) => { - buffer += text; + accum += text; + const now = Date.now(); + if (now - lastEmitAt >= COALESCE_MS) { + flushWords(false); + lastEmitAt = now; + } }, }); - const messages = [ - { role: 'system', content: 'You correct grammar and clarity of text.' }, - { role: 'user', content: input.prompt }, - ]; - - await gen(messages as unknown[], { - max_new_tokens: input.maxNewTokens ?? maxNewTokensDefault, - do_sample: false, - streamer, - }); + try { + await gen(input.prompt as unknown as string, { + max_new_tokens: input.maxNewTokens ?? maxNewTokensDefault, + do_sample: false, + streamer, + }); + } finally { + // Flush any remainder and close the stream + flushWords(true); + close(); + } - // Flush as streaming chunks (~8 chars) to simulate token cadence - const CHUNK = 8; - for (let i = 0; i < buffer.length; i += CHUNK) { - yield buffer.slice(i, i + CHUNK); + while (!closed || chunks.length) { + if (chunks.length) { + yield chunks.shift() as string; + } else { + await waitForChunk(); + } } }, } satisfies TokenStreamer; } + +// Test-only helper to reset the singleton between specs +export function __resetQwenSingletonForTests() { + singletonGenerator = null; + singletonInitOptions = undefined; +} diff --git a/core/sweepScheduler.ts b/core/sweepScheduler.ts index 38f07449..dd329503 100644 --- a/core/sweepScheduler.ts +++ b/core/sweepScheduler.ts @@ -52,6 +52,7 @@ export function createSweepScheduler( return; } lastEvent = ev; + log.debug('onEvent', { caret: ev.caret, textLen: ev.text.length }); diffusion.update(ev.text, ev.caret); if (timer) clearTimeout(timer); // schedule pause catch-up @@ -61,6 +62,7 @@ export function createSweepScheduler( typingInterval = setInterval(() => { try { diffusion.tickOnce(); + log.trace('tickOnce'); } catch { // fail-safe: stop streaming to avoid runaway loops clearIntervals(); @@ -82,6 +84,7 @@ export function createSweepScheduler( ) { await diffusion.catchUp(); steps += 1; + log.debug('catchUp step', { steps, frontier: diffusion.getState().frontier }); } } catch { // swallow to keep UI responsive diff --git a/core/typingMonitor.ts b/core/typingMonitor.ts index 7518070d..f70f3f39 100644 --- a/core/typingMonitor.ts +++ b/core/typingMonitor.ts @@ -25,7 +25,14 @@ export interface TypingMonitor { emit(event: TypingEvent): void; } +import { createLogger, getLoggerConfig } from './logger'; + export function createTypingMonitor(): TypingMonitor { + // Optional debug logger + let log: import('./logger').Logger | null = null; + try { + if (getLoggerConfig().enabled) log = createLogger('monitor'); + } catch {} const listeners = new Set<(event: TypingEvent) => void>(); return { on(listener) { @@ -33,6 +40,11 @@ export function createTypingMonitor(): TypingMonitor { return () => listeners.delete(listener); }, emit(event) { + log?.debug('emit', { + caret: event.caret, + textLen: event.text.length, + atMs: event.atMs, + }); for (const listener of listeners) listener(event); }, }; diff --git a/coverage/index.html b/coverage/index.html index d6a4f95b..287c19c2 100644 --- a/coverage/index.html +++ b/coverage/index.html @@ -23,30 +23,30 @@

All files

- 97.67% + 97.8% Statements - 1220/1249 + 1428/1460
- 90.23% + 90.16% Branches - 231/256 + 275/305
- 94.28% + 95% Functions - 66/70 + 76/80
- 97.67% + 97.8% Lines - 1220/1249 + 1428/1460
@@ -95,32 +95,32 @@

All files

core - -
+ +
- 97.39% - 412/423 - 91.01% - 81/89 - 96.66% - 29/30 - 97.39% - 412/423 + 99.79% + 493/494 + 92.3% + 96/104 + 100% + 33/33 + 99.79% + 493/494 core/lm - -
+ +
- 94.44% - 306/324 - 85.18% - 46/54 - 81.25% - 13/16 - 94.44% - 306/324 + 93.31% + 433/464 + 85.22% + 75/88 + 82.6% + 19/23 + 93.31% + 433/464 @@ -161,7 +161,7 @@

All files

+ + + + diff --git a/web-demo/v2/index.html b/web-demo/v2/index.html new file mode 100644 index 00000000..68dda252 --- /dev/null +++ b/web-demo/v2/index.html @@ -0,0 +1,14 @@ + + + + + + MindType Demo v2 (Noisy Typing Tester) + + +
+ + + + + diff --git a/web-demo/v2/main.tsx b/web-demo/v2/main.tsx new file mode 100644 index 00000000..cd23ea80 --- /dev/null +++ b/web-demo/v2/main.tsx @@ -0,0 +1,11 @@ +import { StrictMode } from 'react'; +import { createRoot } from 'react-dom/client'; +import Tester from './tester/App'; + +createRoot(document.getElementById('root')!).render( + + + , +); + + diff --git a/web-demo/v2/tester/App.tsx b/web-demo/v2/tester/App.tsx new file mode 100644 index 00000000..55ee998f --- /dev/null +++ b/web-demo/v2/tester/App.tsx @@ -0,0 +1,517 @@ +import { useEffect, useMemo, useRef, useState } from 'react'; +import { boot } from '../../../index'; +import { replaceRange } from '../../../utils/diff'; +import { setLoggerConfig } from '../../../core/logger'; +import { + getMinValidationWords, + getMaxValidationWords, + setValidationBandWords, + setTypingTickMs, +} from '../../../config/defaultThresholds'; + +type LayoutName = 'qwerty' | 'qwertz'; + +const LAYOUTS: Record = { + qwerty: ['`1234567890-=', 'qwertyuiop[]', "asdfghjkl;'", 'zxcvbnm,./'], + qwertz: ['`1234567890-=', 'qwertzuiop[]', "asdfghjkl;'", 'yxcvbnm,./'], +}; + +function buildAdjacency(rows: string[]): Record { + const map: Record = {}; + const grid = rows.map((r) => r.split('')); + for (let r = 0; r < grid.length; r++) { + for (let c = 0; c < grid[r].length; c++) { + const ch = grid[r][c].toLowerCase(); + const neighbors: string[] = []; + for (let dr = -1; dr <= 1; dr++) { + for (let dc = -1; dc <= 1; dc++) { + if (dr === 0 && dc === 0) continue; + const rr = r + dr; + const cc = c + dc; + if (rr >= 0 && rr < grid.length && cc >= 0 && cc < grid[rr].length) { + neighbors.push(grid[rr][cc].toLowerCase()); + } + } + } + map[ch] = Array.from(new Set(neighbors)); + } + } + return map; +} + +function pickAdjacentChar(ch: string, layout: LayoutName, rng: () => number): string { + const rows = LAYOUTS[layout]; + const adj = buildAdjacency(rows); + const lower = ch.toLowerCase(); + const opts = adj[lower]; + if (!opts || opts.length === 0) return ch; + const pick = opts[Math.floor(rng() * opts.length)]; + return ch === lower ? pick : pick.toUpperCase(); +} + +// Approachable, long-form sample text +const PASSAGE = ( + 'This is a simple product note about how MindTyper helps you compose clear text. ' + + 'It spots little slips like doubled spaces, missing letters, or stray punctuation, ' + + 'and quietly straightens them while you continue typing. You can pause, think, and ' + + 'resume without losing your place. The goal is not to be poetic, just helpful and ' + + 'calm. As you type, the engine watches recent words and cleans them up in a way that ' + + 'feels natural, like a careful editor sitting beside you. The demo below simulates a ' + + 'real keyboard session with bursts and short breaks, so you can see the cleanup catch ' + + 'up to the words you just wrote. Feel free to tweak the sliders to adjust speed, error ' + + 'rate, and rhythm. When the passage reaches the end, it clears and starts again so you ' + + 'can observe the behavior from the very first characters.' +); + +export default function Tester() { + const [tickMs, setTickMs] = useState(90); + const [errorRate, setErrorRate] = useState(0.08); + const [jitterMs, setJitterMs] = useState(20); + const [burstiness, setBurstiness] = useState(0.5); // 0..1 + const [pauseWeight, setPauseWeight] = useState(1.5); // multiplier at spaces/punct + const [layout, setLayout] = useState('qwerty'); + const [autoPlay, setAutoPlay] = useState(true); + const [text, setText] = useState(''); + const [minBand, setMinBand] = useState(getMinValidationWords()); + const [maxBand, setMaxBand] = useState(getMaxValidationWords()); + const [bandRange, setBandRange] = useState<{ start: number; end: number } | null>(null); + const [debugOn, setDebugOn] = useState(false); + + const caretRef = useRef(0); + const textRef = useRef(''); + const simTimeoutRef = useRef(null); + const srcIndexRef = useRef(0); + const burstLeftRef = useRef(0); + + const pipeline = useMemo( + () => + boot({ + security: { + isSecure: () => false, + isIMEComposing: () => false, + }, + }), + [], + ); + + useEffect(() => { + pipeline.start(); + try { + const stored = localStorage.getItem('mt.debug'); + if (stored === 'true') { + setLoggerConfig({ enabled: true, level: 'debug' }); + setDebugOn(true); + console.info('[v2] debug logging enabled'); + } + } catch {} + return () => pipeline.stop(); + }, [pipeline]); + + useEffect(() => { + (window as any).mt = pipeline; + return () => delete (window as any).mt; + }, [pipeline]); + + // Persist key controls between visits + useEffect(() => { + try { + localStorage.setItem('mt.v2.tickMs', String(tickMs)); + localStorage.setItem('mt.v2.errorRate', String(errorRate)); + localStorage.setItem('mt.v2.jitterMs', String(jitterMs)); + localStorage.setItem('mt.v2.burstiness', String(burstiness)); + localStorage.setItem('mt.v2.pauseWeight', String(pauseWeight)); + localStorage.setItem('mt.v2.layout', layout); + } catch {} + }, [tickMs, errorRate, jitterMs, burstiness, pauseWeight, layout]); + + // Drive core tick and band size + useEffect(() => { + setTypingTickMs(tickMs); + }, [tickMs]); + useEffect(() => { + setValidationBandWords(minBand, maxBand); + }, [minBand, maxBand]); + + // Listen for band/highlight events and apply diffs in the textarea + useEffect(() => { + const onBand = (e: Event) => { + const { start, end } = (e as CustomEvent).detail as { start: number; end: number }; + setBandRange({ start, end }); + }; + const onHighlight = (e: Event) => { + const { start, end, text: diffText } = (e as CustomEvent).detail as { + start: number; + end: number; + text?: string; + }; + if (typeof diffText === 'string') { + try { + const caret = caretRef.current; + const updated = replaceRange(textRef.current, start, end, diffText, caret); + setText(updated); + textRef.current = updated; + } catch (err) { + console.warn('[v2] failed to apply diff', { start, end, diffText, err }); + } + } + }; + window.addEventListener('mindtyper:validationBand', onBand as EventListener); + window.addEventListener('mindtyper:highlight', onHighlight as EventListener); + return () => { + window.removeEventListener('mindtyper:validationBand', onBand as EventListener); + window.removeEventListener('mindtyper:highlight', onHighlight as EventListener); + }; + }, []); + + useEffect(() => { + try { + const t = localStorage.getItem('mt.v2.tickMs'); + const e = localStorage.getItem('mt.v2.errorRate'); + const j = localStorage.getItem('mt.v2.jitterMs'); + const b = localStorage.getItem('mt.v2.burstiness'); + const p = localStorage.getItem('mt.v2.pauseWeight'); + const l = localStorage.getItem('mt.v2.layout') as LayoutName | null; + if (t) setTickMs(parseInt(t, 10)); + if (e) setErrorRate(parseFloat(e)); + if (j) setJitterMs(parseInt(j, 10)); + if (b) setBurstiness(parseFloat(b)); + if (p) setPauseWeight(parseFloat(p)); + if (l === 'qwerty' || l === 'qwertz') setLayout(l); + } catch {} + }, []); + + // Utility RNG for stable distribution per step + const rng = () => Math.random(); + + function computeDelayForChar(nextChar: string): number { + let base = tickMs; + // inside a burst we accelerate typing + if (burstLeftRef.current > 0) base = Math.max(15, Math.floor(base * 0.6)); + // pauses at word boundaries and punctuation + if (nextChar === ' ' || nextChar === '\n') base = Math.floor(base * (1 + pauseWeight)); + if (/[\.!?,:;]/.test(nextChar)) base = Math.floor(base * (1 + pauseWeight * 1.2)); + // jitter + if (jitterMs > 0) { + const delta = Math.floor((rng() * 2 - 1) * jitterMs); + base = Math.max(15, base + delta); + } + return base; + } + + function maybeNoisyEmit(correct: string): { emit: string; advance: number } { + // With probability errorRate, introduce a realistic slip + if (rng() < errorRate) { + // If space, either skip or double + if (correct === ' ') { + if (rng() < 0.5) return { emit: '', advance: 1 }; // skip space + return { emit: ' ', advance: 1 }; // double space + } + // Adjacent substitution for letters and common symbols + if (/^[A-Za-z]$/.test(correct)) { + const swapped = pickAdjacentChar(correct, layout, rng); + return { emit: swapped, advance: 1 }; + } + // Occasionally duplicate a character + if (rng() < 0.2) return { emit: correct + correct, advance: 1 }; + } + return { emit: correct, advance: 1 }; + } + + function schedule(stepDelay: number) { + if (simTimeoutRef.current) window.clearTimeout(simTimeoutRef.current); + simTimeoutRef.current = window.setTimeout(runStep, stepDelay); + } + + function runStep() { + if (!autoPlay) return; + // Loop when we reach the end + if (srcIndexRef.current >= PASSAGE.length) { + srcIndexRef.current = 0; + setText(''); + textRef.current = ''; + caretRef.current = 0; + pipeline.ingest('', 0); + // short reset pause + schedule(Math.max(250, tickMs * 4)); + return; + } + + // decide if we start or continue a burst + if (burstLeftRef.current <= 0 && rng() < burstiness) { + // bursts of 5-15 characters + burstLeftRef.current = 5 + Math.floor(rng() * 11); + } + + const nextChar = PASSAGE[srcIndexRef.current]; + const noisy = maybeNoisyEmit(nextChar); + + // apply emitted string to our text buffer + const currentText = textRef.current; + const nextText = currentText + noisy.emit; + setText(nextText); + textRef.current = nextText; + caretRef.current = nextText.length; + pipeline.ingest(nextText, caretRef.current); + + srcIndexRef.current += noisy.advance; + if (burstLeftRef.current > 0) burstLeftRef.current -= 1; + + const delay = computeDelayForChar(nextChar); + recordStep(nextChar, noisy.emit, delay); + schedule(delay); + } + + // Drive the simulation with variable delays + useEffect(() => { + if (!autoPlay) { + if (simTimeoutRef.current) { + window.clearTimeout(simTimeoutRef.current); + simTimeoutRef.current = null; + } + return; + } + // kick off + schedule(Math.max(15, tickMs)); + return () => { + if (simTimeoutRef.current) window.clearTimeout(simTimeoutRef.current); + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [autoPlay, tickMs, errorRate, jitterMs, burstiness, pauseWeight, layout]); + + // Simple glass-style helpers + const glass: React.CSSProperties = { + background: 'linear-gradient(to bottom right, rgba(255,255,255,0.06), rgba(255,255,255,0.03))', + border: '1px solid rgba(255,255,255,0.08)', + borderRadius: 16, + boxShadow: '0 10px 30px rgba(0,0,0,0.5)', + backdropFilter: 'blur(12px)', + WebkitBackdropFilter: 'blur(12px)', + }; + const headerStyle: React.CSSProperties = { + fontSize: 'clamp(1.2rem, 2.4vw, 2rem)', + fontWeight: 800, + letterSpacing: 0.5, + margin: 0, + }; + const subStyle: React.CSSProperties = { + marginTop: 6, + opacity: 0.8, + lineHeight: 1.4, + }; + const grid: React.CSSProperties = { + display: 'grid', + gap: 16, + gridTemplateColumns: 'repeat(auto-fit, minmax(220px, 1fr))', + alignItems: 'center', + }; + const ctrlLabel: React.CSSProperties = { + display: 'flex', + flexDirection: 'column', + gap: 8, + fontSize: '1rem', + fontWeight: 600, + }; + const rangeStyle: React.CSSProperties = { width: '100%' }; + const numberStyle: React.CSSProperties = { width: 96, fontSize: '1rem', padding: '6px 10px' }; + + // Stats (observed) + const statsRef = useRef({ + steps: 0, + inserts: 0, + substitutes: 0, + duplicates: 0, + skippedSpaces: 0, + lastDelayMs: 0, + avgDelayMs: 0, + }); + const [, forceStatsTick] = useState(0); + const healthRef = useRef({ monitor: 0, scheduler: 0, diffusion: 0, lastBandAt: 0, lastHighlightAt: 0 }); + + function recordStep(nextChar: string, emitted: string, usedDelay: number) { + const s = statsRef.current; + s.steps += 1; + s.lastDelayMs = usedDelay; + s.avgDelayMs = s.avgDelayMs === 0 ? usedDelay : Math.round(s.avgDelayMs * 0.9 + usedDelay * 0.1); + if (emitted.length > 1) { + if (emitted === nextChar + nextChar) s.duplicates += 1; + else s.inserts += 1; + } else if (emitted.length === 0) { + s.skippedSpaces += 1; + } else if (emitted !== nextChar) { + s.substitutes += 1; + } + // trigger UI update at low rate + if (s.steps % 5 === 0) forceStatsTick((x) => x + 1); + } + + const pagePad = 'clamp(8px, 2vw, 16px)'; + const panelPad = 'clamp(8px, 2vw, 16px)'; + + return ( +
+
+

Noisy Typing Tester

+

+ Precise, adjustable simulation of human typing with bursts, pauses, and keyboard slips. The engine cleans up behind you like manual typing. Tune parameters and watch the effect. +

+
+ +
+
+
+ + + + + + + + + + + +
+
+