From dcdc60101bb482b0e5f0f58b4f856e2a4124e928 Mon Sep 17 00:00:00 2001 From: ruvnet Date: Thu, 7 May 2026 15:43:06 -0400 Subject: [PATCH] fix(ruvector): bundle ONNX runtime into dist/ on build (#354) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The published tarball was missing every ONNX runtime file except a 1-line `package.json`, so `OptimizedOnnxEmbedder` (and any code path that calls `initOnnxEmbedder()`) crashed on every clean install with: Error: ONNX WASM files not bundled. The onnx/ directory is missing. Root cause is the build script: "build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/" `tsc` only emits compiled `.ts` output (no `allowJs`). The wasm-bindgen artifacts under `src/core/onnx/pkg/` (the .wasm payload, _bg.js, type defs, LICENSE) and the sibling `src/core/onnx/loader.js` are runtime JavaScript — `tsc` doesn't relay them — but the script only copied a single `package.json`. Everything else stayed in `src/` and never made it into the tarball. Fix: - Replace the single-file copy with `scripts/copy-onnx-assets.js`, a Node-portable recursive copy (works on Windows; doesn't need cp). - Skip dotfiles (e.g. transient `.claude-flow/` agent metadata) and `node_modules/` so they don't leak into the published artifact. - Sanity-check that the canonical runtime files (`*_bg.{js,wasm}`, `*.js`, `loader.js`) landed where `onnx-embedder.js` looks for them; fail the build loudly if not. Verified end-to-end against ruvector@0.2.25 on Node 22.22.2: $ rm -rf dist/core/onnx && npm run build > tsc && node scripts/copy-onnx-assets.js copy-onnx-assets: 10 ONNX runtime file(s) staged under dist/. $ ls dist/core/onnx/pkg/ LICENSE ruvector_onnx_embeddings_wasm_bg.js loader.js ruvector_onnx_embeddings_wasm_bg.wasm package.json ruvector_onnx_embeddings_wasm_bg.wasm.d.ts ruvector_onnx_embeddings_wasm.d.ts ruvector_onnx_embeddings_wasm.js $ npm pack && tar -tzf ruvector-0.2.25.tgz | grep -c onnx/pkg 8 # Clean install into /tmp: $ node -e "const {isOnnxAvailable} = require('ruvector/dist/core/onnx-embedder'); console.log(isOnnxAvailable())" true Closes #354 Co-Authored-By: claude-flow --- npm/packages/ruvector/package.json | 2 +- .../ruvector/scripts/copy-onnx-assets.js | 79 +++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 npm/packages/ruvector/scripts/copy-onnx-assets.js diff --git a/npm/packages/ruvector/package.json b/npm/packages/ruvector/package.json index 7af805604..758ddc231 100644 --- a/npm/packages/ruvector/package.json +++ b/npm/packages/ruvector/package.json @@ -8,7 +8,7 @@ "ruvector": "./bin/cli.js" }, "scripts": { - "build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/", + "build": "tsc && node scripts/copy-onnx-assets.js", "verify-dist": "node scripts/verify-dist.js", "prepublishOnly": "npm run build && npm run verify-dist", "test": "node test/integration.js && node test/cli-commands.js" diff --git a/npm/packages/ruvector/scripts/copy-onnx-assets.js b/npm/packages/ruvector/scripts/copy-onnx-assets.js new file mode 100644 index 000000000..ac29f3e0c --- /dev/null +++ b/npm/packages/ruvector/scripts/copy-onnx-assets.js @@ -0,0 +1,79 @@ +#!/usr/bin/env node +/** + * copy-onnx-assets.js — copy non-TypeScript ONNX runtime files into `dist/`. + * + * Why: `tsconfig.json` does not set `allowJs`, so `tsc` only emits compiled + * `.ts` output. The wasm-bindgen artifacts under `src/core/onnx/pkg/` + * (`*.wasm`, `*_bg.js`, `*.d.ts`, `package.json`, `LICENSE`) and the + * sibling `src/core/onnx/loader.js` are required at runtime by + * `dist/core/onnx-embedder.js` but were not being copied — published + * tarballs were missing the WASM payload entirely (#354), making + * `OptimizedOnnxEmbedder` unloadable on every clean install. + * + * Implemented as a Node script (no `cp -r`) so the build runs unchanged + * on Windows. + */ + +const fs = require('fs'); +const path = require('path'); + +const pkgRoot = path.resolve(__dirname, '..'); + +function copyRecursive(src, dst) { + const stat = fs.statSync(src); + if (stat.isDirectory()) { + fs.mkdirSync(dst, { recursive: true }); + for (const entry of fs.readdirSync(src)) { + // Skip dotfiles (e.g. transient `.claude-flow/` agent metadata) + // and node_modules — neither belongs in the published artifact. + if (entry.startsWith('.') || entry === 'node_modules') continue; + copyRecursive(path.join(src, entry), path.join(dst, entry)); + } + } else { + fs.mkdirSync(path.dirname(dst), { recursive: true }); + fs.copyFileSync(src, dst); + } +} + +// (sourceRel, destinationRel) — both relative to the package root. +const assets = [ + ['src/core/onnx/loader.js', 'dist/core/onnx/loader.js'], + ['src/core/onnx/pkg', 'dist/core/onnx/pkg'], +]; + +let fileCount = 0; +function countFiles(p) { + const stat = fs.statSync(p); + if (stat.isDirectory()) { + for (const entry of fs.readdirSync(p)) countFiles(path.join(p, entry)); + } else { + fileCount++; + } +} + +for (const [srcRel, dstRel] of assets) { + const src = path.join(pkgRoot, srcRel); + const dst = path.join(pkgRoot, dstRel); + if (!fs.existsSync(src)) { + console.error(`copy-onnx-assets: missing source ${srcRel}`); + process.exit(1); + } + copyRecursive(src, dst); + countFiles(src); +} + +// Sanity check the runtime payload landed where the embedder expects it. +const required = [ + 'dist/core/onnx/loader.js', + 'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.js', + 'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm', + 'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.js', +]; +const missing = required.filter((rel) => !fs.existsSync(path.join(pkgRoot, rel))); +if (missing.length > 0) { + console.error('copy-onnx-assets: required files missing after copy:'); + for (const m of missing) console.error(` - ${m}`); + process.exit(1); +} + +console.log(`copy-onnx-assets: ${fileCount} ONNX runtime file(s) staged under dist/.`);