diff --git a/CHANGELOG.md b/CHANGELOG.md index 19394cb..c2cc7c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added +- **`WorkerDocxodus.prepare()` — comparison-path warmup** (consumer issue JSv4/crowdsourced-redlines-js#2). `createWorkerDocxodus()` warms the .NET WASM runtime but does **not** load the comparison assemblies — the runtime defers `Docxodus.*.wasm` and its `System.*.wasm` dependents until the first real comparison, so the first `compareDocuments()` paid an extra ~3s of pure assembly-load latency. The new `prepare(): Promise` pays that cost up front: it runs a complete comparison inside the worker against two tiny seed documents constructed in-memory on the .NET side (no caller IO, no seed fixtures to ship), forcing every assembly the engine touches to resolve. After `await prepare()`, the next `compareDocuments()` / `compareDocumentsToHtml()` triggers no further `.wasm` fetches. The method is idempotent (repeated/concurrent calls share one in-flight warmup and resolve immediately once complete) and concurrent-safe (a `compareDocuments()` issued while a `prepare()` is in flight does not double-load assemblies). Implemented as a new `Warmup()` `[JSExport]` on `DocumentComparer`, a `"prepare"` worker message, and the `WorkerDocxodus.prepare()` proxy method. Tests: `npm/tests/worker-prepare.spec.ts` (verifies via page-level `.wasm` request monitoring that prepare loads `Docxodus.wasm`, the following compare loads none, idempotency resolves <50ms, and concurrent prepare+compare never double-loads). + ## [6.1.0] - 2026-05-28 ### Changed diff --git a/npm/README.md b/npm/README.md index 2205538..89e7650 100644 --- a/npm/README.md +++ b/npm/README.md @@ -300,6 +300,36 @@ const metadata = await docxodus.getDocumentMetadata(docxFile); docxodus.terminate(); ``` +#### First-call warmup + +`createWorkerDocxodus()` warms the .NET WASM runtime, but the **comparison code +path is not exercised until your first `compareDocuments()`**. That first call +pays a one-time warmup cost (comparison-assembly initialization + JIT of the +diff/XML engine) — roughly **2× the latency** of every subsequent compare. + +`prepare()` is an **optional** method that pays this cost up front. Call it once +after creating the worker — during app boot, or while the user is still picking +files — so the first user-triggered comparison is already hot. It does **not** +run automatically; if you skip it, the first compare simply absorbs the warmup +as before. + +```typescript +const docxodus = await createWorkerDocxodus({ wasmBasePath: '/wasm/' }); + +// Optional: warm the comparison path ahead of the first user action. +await docxodus.prepare(); + +// Now hot — the first real compare runs at steady-state speed and triggers +// no further .wasm fetches. +const redlined = await docxodus.compareDocuments(original, modified); +``` + +`prepare()` is idempotent (repeated calls share one in-flight warmup and resolve +immediately once complete), needs no input documents or seed files of your own +(it builds tiny seed documents inside the worker), and is concurrent-safe — +issuing a `compareDocuments()` while a `prepare()` is still in flight will not +double-load assemblies. + ### React Hooks #### `useDocxodus(wasmBasePath?: string)` diff --git a/npm/src/docxodus.worker.ts b/npm/src/docxodus.worker.ts index 1f145a2..22b1c86 100644 --- a/npm/src/docxodus.worker.ts +++ b/npm/src/docxodus.worker.ts @@ -510,6 +510,23 @@ function handleSessionMoveAnnotation( } } +/** + * Handle prepare request — warm the comparison code path so the next + * compareDocuments triggers no further WASM assembly fetches. + */ +function handlePrepare(): { error?: string } { + const exports = ensureInitialized(); + try { + const result = exports.DocumentComparer.Warmup(); + if (isErrorResponse(result)) { + return parseError(result); + } + return {}; + } catch (error) { + return { error: String(error) }; + } +} + /** * Handle getVersion request. */ @@ -647,6 +664,17 @@ self.onmessage = async (event: MessageEvent) => { break; } + case "prepare": { + const result = handlePrepare(); + response = { + id: request.id, + type: "prepare", + success: !result.error, + error: result.error, + }; + break; + } + case "sessionOpen": { const sessionOpenRequest = request as WorkerSessionOpenRequest; const result = handleSessionOpen(sessionOpenRequest); diff --git a/npm/src/types.ts b/npm/src/types.ts index 88bbb2c..83bb16c 100644 --- a/npm/src/types.ts +++ b/npm/src/types.ts @@ -668,6 +668,12 @@ export interface DocxodusWasmExports { ) => string; }; DocumentComparer: { + /** + * Force the comparison code path hot by running a real comparison against + * tiny in-memory seed documents. Returns "ok" or a JSON error object. + * Idempotent — assemblies load only once. + */ + Warmup: () => string; CompareDocuments: ( originalBytes: Uint8Array, modifiedBytes: Uint8Array, @@ -2009,6 +2015,7 @@ export type WorkerRequestType = | "getRevisions" | "getDocumentMetadata" | "getVersion" + | "prepare" | "sessionOpen" | "sessionClose" | "sessionAddAnnotation" @@ -2099,6 +2106,14 @@ export interface WorkerGetVersionRequest extends WorkerRequestBase { type: "getVersion"; } +/** + * Warm up the comparison code path so the next compare triggers no further + * WASM assembly fetches. Carries no payload. + */ +export interface WorkerPrepareRequest extends WorkerRequestBase { + type: "prepare"; +} + /** * Open a DocxSession in the worker. */ @@ -2173,6 +2188,7 @@ export type WorkerRequest = | WorkerGetRevisionsRequest | WorkerGetDocumentMetadataRequest | WorkerGetVersionRequest + | WorkerPrepareRequest | WorkerSessionOpenRequest | WorkerSessionCloseRequest | WorkerSessionAddAnnotationRequest @@ -2253,6 +2269,13 @@ export interface WorkerGetVersionResponse extends WorkerResponseBase { version?: VersionInfo; } +/** + * Response from prepare request. Carries no payload beyond success/error. + */ +export interface WorkerPrepareResponse extends WorkerResponseBase { + type: "prepare"; +} + /** * Response from sessionOpen request. */ @@ -2294,6 +2317,7 @@ export type WorkerResponse = | WorkerGetRevisionsResponse | WorkerGetDocumentMetadataResponse | WorkerGetVersionResponse + | WorkerPrepareResponse | WorkerSessionOpenResponse | WorkerSessionCloseResponse | WorkerSessionEditResponse; diff --git a/npm/src/worker-proxy.ts b/npm/src/worker-proxy.ts index f6fe794..f00fbdd 100644 --- a/npm/src/worker-proxy.ts +++ b/npm/src/worker-proxy.ts @@ -28,6 +28,7 @@ import type { WorkerGetRevisionsResponse, WorkerGetDocumentMetadataResponse, WorkerGetVersionResponse, + WorkerPrepareResponse, WorkerSessionOpenResponse, WorkerSessionEditResponse, WorkerDocxodusOptions, @@ -210,6 +211,30 @@ export interface WorkerDocxodus { */ getVersion(): Promise; + /** + * Pre-warm the comparison code path. + * + * The 10s runtime warmup paid by {@link createWorkerDocxodus} does not load + * the comparison assemblies — the .NET WASM runtime defers + * `Docxodus.*.wasm` and its `System.*.wasm` dependents until the first + * {@link compareDocuments} call, which then costs ~3s of pure assembly-load + * latency. Call `prepare()` after creating the worker to pay that cost ahead + * of any user action; once it resolves, the next {@link compareDocuments} + * (or {@link compareDocumentsToHtml}) triggers no further `.wasm` fetches. + * + * Semantics: + * - **Idempotent.** Repeated calls share one in-flight warmup and resolve + * immediately once it has completed. + * - **No caller IO.** No seed files to fetch, no inputs to construct — the + * seed documents are built inside the worker. + * - **Concurrent-safe.** `prepare()` and `compareDocuments()` may be called + * in any order; a `compareDocuments()` issued while a `prepare()` is in + * flight does not double-load assemblies. + * + * @returns A Promise that resolves when the comparison path is fully hot. + */ + prepare(): Promise; + /** * Open a {@link WorkerDocxSession} for surgical annotation editing inside * the worker. The document bytes are transferred to the worker (zero-copy). @@ -289,6 +314,11 @@ export async function createWorkerDocxodus( // Track if worker is active let isWorkerActive = true; + // Cached warmup promise. Set on the first prepare() and reused thereafter so + // repeated/concurrent calls share a single in-flight (or completed) warmup. + // Reset to null on failure so a later prepare() can retry. + let preparePromise: Promise | null = null; + // Handle worker messages worker.onmessage = (event: MessageEvent) => { const response = event.data; @@ -455,6 +485,22 @@ export async function createWorkerDocxodus( return response.version!; }, + prepare(): Promise { + // Idempotent: hand back the existing warmup if one is in flight or done. + if (preparePromise) { + return preparePromise; + } + preparePromise = sendRequest({ + id: generateId(), + type: "prepare", + }).then(() => undefined); + // On failure, clear the cache so a subsequent prepare() can retry. + preparePromise.catch(() => { + preparePromise = null; + }); + return preparePromise; + }, + async openDocxSession( document: File | Uint8Array, settings?: DocxSessionSettings diff --git a/npm/tests/worker-prepare.spec.ts b/npm/tests/worker-prepare.spec.ts new file mode 100644 index 0000000..49be7f2 --- /dev/null +++ b/npm/tests/worker-prepare.spec.ts @@ -0,0 +1,228 @@ +/** + * Tests for WorkerDocxodus.prepare() — pre-warming the comparison code path. + * + * prepare() runs a real comparison against in-memory seed documents inside the + * worker, forcing the .NET WASM runtime to fully resolve and JIT the comparison + * code path. After it resolves, the first real compareDocuments() (a) triggers + * no further .wasm fetches and (b) runs meaningfully faster than a cold one. + * + * We monitor network requests for ".wasm" at the page level (Playwright + * surfaces dedicated-worker requests on the owning page) and time compares + * inside the worker via performance.now(). + */ + +import { test, expect, type Request } from "@playwright/test"; +import * as fs from "fs"; +import * as path from "path"; +import { fileURLToPath } from "url"; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const testFilesDir = path.join(__dirname, "../../TestFiles"); + +function readTestFile(relativePath: string): Uint8Array { + return new Uint8Array(fs.readFileSync(path.join(testFilesDir, relativePath))); +} + +/** Last path segment of a .wasm URL, e.g. ".../Docxodus.wasm" -> "Docxodus.wasm". */ +function wasmName(url: string): string { + return url.split("/").pop() ?? url; +} + +test.describe("WorkerDocxodus.prepare()", () => { + let wasmRequests: string[]; + + test.beforeEach(async ({ page }) => { + wasmRequests = []; + page.on("request", (req: Request) => { + const url = req.url(); + if (url.endsWith(".wasm")) { + wasmRequests.push(url); + } + }); + + await page.goto("/worker-test-harness.html"); + await page.waitForFunction( + () => (window as any).DocxodusWorkerTests !== undefined, + { timeout: 10000 } + ); + }); + + test("after prepare(), a real compare triggers no additional .wasm fetches", async ({ + page, + }) => { + // Bring up the worker and warm the comparison path. + await page.evaluate(() => (window as any).createDocxodusWorker()); + const prepareResult = await page.evaluate(() => + (window as any).DocxodusWorkerTests.prepare() + ); + // Let any in-flight 'request' events drain before snapshotting. + await page.waitForTimeout(300); + + expect(prepareResult.error).toBeUndefined(); + expect(prepareResult.ok).toBe(true); + // The monitor must actually observe the worker's .wasm requests, otherwise + // the "no new fetches" assertion below would be a meaningless 0 === 0. + expect(wasmRequests.length).toBeGreaterThan(0); + + const beforeCompare = [...wasmRequests]; + + // A real comparison after prepare() must trigger NO new .wasm fetches — + // everything the engine needs is already loaded. + const originalBytes = readTestFile("WC/WC001-Digits.docx"); + const modifiedBytes = readTestFile("WC/WC001-Digits-Mod.docx"); + + const compareResult = await page.evaluate( + async ([original, modified]) => + (window as any).DocxodusWorkerTests.compareDocuments(original, modified), + [Array.from(originalBytes), Array.from(modifiedBytes)] + ); + await page.waitForTimeout(300); + + expect(compareResult.error).toBeUndefined(); + expect(compareResult.docxBytes.length).toBeGreaterThan(0); + + const loadedByCompare = wasmRequests + .filter((u) => !beforeCompare.includes(u)) + .map(wasmName); + + console.log( + `Assemblies loaded by compareDocuments() after prepare(): ${ + loadedByCompare.length === 0 ? "(none)" : loadedByCompare.join(", ") + }` + ); + + expect(loadedByCompare).toEqual([]); + }); + + test("prepare() makes the first real compare meaningfully faster", async ({ + page, + }) => { + const originalBytes = readTestFile("WC/WC001-Digits.docx"); + const modifiedBytes = readTestFile("WC/WC001-Digits-Mod.docx"); + const original = Array.from(originalBytes); + const modified = Array.from(modifiedBytes); + + // Throwaway worker to absorb one-time page-level costs (module eval, first + // dynamic import of the .NET runtime), so the cold/warm gap reflects + // prepare() rather than page warmup. + await page.evaluate(async () => { + await (window as any).createDocxodusWorker(); + (window as any).DocxodusWorker.terminate(); + }); + + // COLD: fresh worker, no prepare(), time the first compare. + const coldFirstMs = await page.evaluate( + async ([o, m]) => { + await (window as any).createDocxodusWorker(); + const t = performance.now(); + await (window as any).DocxodusWorker.compareDocuments( + new Uint8Array(o), + new Uint8Array(m) + ); + const elapsed = performance.now() - t; + (window as any).DocxodusWorker.terminate(); + return elapsed; + }, + [original, modified] + ); + + // WARM: fresh worker, prepare() first, then time the first compare. + const warm = await page.evaluate( + async ([o, m]) => { + await (window as any).createDocxodusWorker(); + const tp = performance.now(); + await (window as any).DocxodusWorker.prepare(); + const prepMs = performance.now() - tp; + const t = performance.now(); + await (window as any).DocxodusWorker.compareDocuments( + new Uint8Array(o), + new Uint8Array(m) + ); + const firstMs = performance.now() - t; + return { prepMs, firstMs }; + }, + [original, modified] + ); + + console.log( + `cold first compare=${coldFirstMs.toFixed(0)}ms | ` + + `prepare=${warm.prepMs.toFixed(0)}ms, warm first compare=${warm.firstMs.toFixed(0)}ms` + ); + + // prepare() must do real warmup work (not a no-op). + expect(warm.prepMs).toBeGreaterThan(50); + // The whole point: a warmed first compare is substantially cheaper than a + // cold one. Observed ~1.9x; assert a conservative 15% improvement. + expect(warm.firstMs).toBeLessThan(coldFirstMs * 0.85); + }); + + test("prepare() is idempotent — second call resolves in <50ms", async ({ + page, + }) => { + await page.evaluate(() => (window as any).createDocxodusWorker()); + + const first = await page.evaluate(() => + (window as any).DocxodusWorkerTests.prepare() + ); + expect(first.error).toBeUndefined(); + expect(first.ok).toBe(true); + + const second = await page.evaluate(() => + (window as any).DocxodusWorkerTests.prepare() + ); + expect(second.error).toBeUndefined(); + expect(second.ok).toBe(true); + + console.log( + `prepare() first=${first.durationMs.toFixed(1)}ms, second=${second.durationMs.toFixed(1)}ms` + ); + + expect(second.durationMs).toBeLessThan(50); + + // Idempotent second call must not re-fetch any assembly. + await page.waitForTimeout(200); + const unique = new Set(wasmRequests); + expect(unique.size).toBe(wasmRequests.length); + }); + + test("compareDocuments() while prepare() is in flight does not double-load assemblies", async ({ + page, + }) => { + await page.evaluate(() => (window as any).createDocxodusWorker()); + await page.waitForTimeout(200); + + const originalBytes = readTestFile("WC/WC001-Digits.docx"); + const modifiedBytes = readTestFile("WC/WC001-Digits-Mod.docx"); + + // Fire prepare() and compareDocuments() concurrently — do NOT await prepare + // before issuing the compare. + const result = await page.evaluate( + async ([original, modified]) => { + const worker = (window as any).DocxodusWorker; + const preparePromise = worker.prepare(); + const comparePromise = worker.compareDocuments( + new Uint8Array(original), + new Uint8Array(modified) + ); + const [, docxBytes] = await Promise.all([preparePromise, comparePromise]); + return { docxLength: docxBytes.length }; + }, + [Array.from(originalBytes), Array.from(modifiedBytes)] + ); + + await page.waitForTimeout(300); + + expect(result.docxLength).toBeGreaterThan(0); + + // No .wasm URL may be fetched more than once — the runtime de-duplicates + // concurrent loads, so there is no double-load even under contention. + const duplicates = wasmRequests.filter( + (u, i) => wasmRequests.indexOf(u) !== i + ); + console.log( + `Total .wasm requests: ${wasmRequests.length}, unique: ${new Set(wasmRequests).size}` + ); + expect(duplicates.map(wasmName)).toEqual([]); + }); +}); diff --git a/npm/tests/worker-test-harness.html b/npm/tests/worker-test-harness.html index 4fd35ac..b3c4ce8 100644 --- a/npm/tests/worker-test-harness.html +++ b/npm/tests/worker-test-harness.html @@ -116,6 +116,20 @@

Docxodus Worker Test Harness

} }, + // Pre-warm the comparison code path; returns elapsed ms. + prepare: async function() { + if (!window.DocxodusWorker) { + return { error: { message: 'Worker not initialized' } }; + } + try { + const start = performance.now(); + await window.DocxodusWorker.prepare(); + return { ok: true, durationMs: performance.now() - start }; + } catch (error) { + return { error: { message: error.message } }; + } + }, + // Get document metadata using worker (for lazy loading) getDocumentMetadata: async function(bytes) { if (!window.DocxodusWorker) { diff --git a/wasm/DocxodusWasm/DocumentComparer.cs b/wasm/DocxodusWasm/DocumentComparer.cs index 6107dc7..43839d5 100644 --- a/wasm/DocxodusWasm/DocumentComparer.cs +++ b/wasm/DocxodusWasm/DocumentComparer.cs @@ -2,7 +2,9 @@ using System.Runtime.Versioning; using System.Text.Json; using Docxodus; +using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Wordprocessing; namespace DocxodusWasm; @@ -13,6 +15,89 @@ namespace DocxodusWasm; [SupportedOSPlatform("browser")] public partial class DocumentComparer { + /// + /// Force the comparison code path fully hot. + /// + /// Creating the WASM runtime does not exercise the comparison engine, so + /// the first real pays a one-time warmup + /// cost — comparison-assembly initialization plus JIT of the diff/XML + /// stack — on top of the actual diff work (~2x the steady-state latency). + /// + /// This method runs a complete comparison against two tiny seed + /// documents constructed in-memory, exercising the exact code path + /// uses (). + /// That resolves and JIT-compiles everything the engine touches, so a + /// subsequent real comparison runs at steady-state speed and triggers no + /// further .wasm fetches. + /// + /// Idempotent and self-contained: no caller IO, no seed fixtures to + /// ship. Safe to call repeatedly — the warmup work is only paid once. + /// Returns "ok" on success or a JSON error object; warmup is + /// best-effort, so even the error path has already warmed the engine. + /// + /// "ok" on success, or a JSON error object. + [JSExport] + public static string Warmup() + { + try + { + // Two minimal in-memory documents that differ by a single word, so + // the comparer produces a real insertion/deletion and walks the + // full LCS + markup path rather than an empty fast-exit. + var original = new WmlDocument("warmup-original.docx", BuildSeedDocx("warmup original")); + var modified = new WmlDocument("warmup-modified.docx", BuildSeedDocx("warmup modified")); + + var settings = new WmlComparerSettings + { + AuthorForRevisions = "Docxodus", + DateTimeForRevisions = DateTime.UtcNow.ToString("o"), + DetailThreshold = 0.15 + }; + + var result = WmlComparer.Compare(original, modified, settings); + + // Touch the revision-extraction path too, since callers that warm + // the compare path almost always read revisions next. + _ = WmlComparer.GetRevisions(result, settings); + + return "ok"; + } + catch (Exception ex) + { + // The act of calling WmlComparer.Compare above has already forced + // the assemblies to load even if the comparison itself threw, so + // warmup has still served its purpose. Report the failure so a + // caller can surface it, but do not throw. + return DocumentConverter.SerializeError(ex.Message, ex.GetType().Name); + } + } + + /// + /// Build a minimal but valid DOCX package (one paragraph) in memory. + /// Includes the parts comparison expects (styles, settings). + /// + private static byte[] BuildSeedDocx(string text) + { + using var ms = new MemoryStream(); + using (var doc = WordprocessingDocument.Create(ms, WordprocessingDocumentType.Document)) + { + var mainPart = doc.AddMainDocumentPart(); + mainPart.Document = new Document(new Body( + new Paragraph( + new Run( + new Text(text) { Space = SpaceProcessingModeValues.Preserve })))); + + var stylesPart = mainPart.AddNewPart(); + stylesPart.Styles = new Styles(); + + var settingsPart = mainPart.AddNewPart(); + settingsPart.Settings = new Settings(); + + mainPart.Document.Save(); + } + return ms.ToArray(); + } + /// /// Compare two DOCX documents and return the result as a redlined DOCX (byte array). ///