diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 38a81f13b6..a58e4d845f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -388,6 +388,43 @@ jobs: exit 1 fi + gfql-pyodide-browser: + needs: changes + if: ${{ needs.changes.outputs.docs == 'true' || needs.changes.outputs.gfql == 'true' || needs.changes.outputs.infra == 'true' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }} + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Set up Node.js 20 + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install uv + run: python -m pip install "uv==0.11.3" + + - name: Install browser test dependencies + run: npm install --prefix demos/gfql/pyodide --no-audit --no-fund + + - name: Install Chromium + run: npm exec --prefix demos/gfql/pyodide -- playwright install --with-deps chromium + + - name: Build GFQL Pyodide CDN bundle + run: node demos/gfql/pyodide/build-bundle.mjs /tmp/pygraphistry-gfql-pyodide-browser --flavor cdn + + - name: Browser smoke + env: + GFQL_BROWSER_SCREENSHOT: /tmp/gfql-pyodide-browser.png + run: node /tmp/pygraphistry-gfql-pyodide-browser/test-browser.mjs /tmp/pygraphistry-gfql-pyodide-browser + check-spark-lockfile: needs: changes if: ${{ needs.changes.outputs.spark == 'true' || needs.changes.outputs.infra == 'true' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }} diff --git a/.readthedocs.yml b/.readthedocs.yml index caa5cc10bf..4ce955dc4c 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,6 +9,7 @@ build: os: ubuntu-22.04 tools: python: "3.12" + nodejs: "20" apt_packages: # System dependencies - now works because we use jobs instead of commands # More closely mirror https://github.com/sphinx-doc/sphinx-docker-images @@ -42,6 +43,7 @@ build: - cp DEVELOP.md docs/source/DEVELOP.md build: html: + - node demos/gfql/pyodide/build-bundle.mjs --docs-static --flavor cdn - sphinx-build -b html -d docs/doctrees docs/source $READTHEDOCS_OUTPUT/html/ epub: - sphinx-build -b epub -d docs/doctrees docs/source docs/_build/epub diff --git a/demos/gfql/pyodide/README.md b/demos/gfql/pyodide/README.md new file mode 100644 index 0000000000..2278b68e89 --- /dev/null +++ b/demos/gfql/pyodide/README.md @@ -0,0 +1,90 @@ +# Pyodide GFQL proof + +This is a small `gfql.js` proof for running PyGraphistry GFQL inside Pyodide. + +It uses: + +- Pyodide `pandas`, `requests`, `packaging`, and `typing-extensions` packages. +- `micropip` for the pure Python `lark` runtime dependency used by the Cypher parser. +- A pure Python wheel for this repo, installed into Pyodide with `deps=False` after the runtime deps are already present. + +For a browser URL wheel, `gfql.js` uses `micropip.install(url, deps=False)`. For a Node byte-mounted local wheel, it writes the wheel into Pyodide FS and extracts it into `purelib`; Pyodide/Node `fetch` does not resolve Pyodide FS paths as URLs. + +Build a wheel from a writable copy of the repo: + +```bash +rm -rf /tmp/pygraphistry-pyodide-src /tmp/pygraphistry-pyodide-dist +rsync -a --exclude .git --exclude plans --exclude uv.lock --exclude '=2' ./ /tmp/pygraphistry-pyodide-src/ +uv run --no-project --with build python -m build --wheel --outdir /tmp/pygraphistry-pyodide-dist /tmp/pygraphistry-pyodide-src +``` + +Run the Node smoke proof: + +```bash +rm -rf /tmp/pygraphistry-pyodide-node +npm install --prefix /tmp/pygraphistry-pyodide-node pyodide@314.0.0 +PYODIDE_MODULE=/tmp/pygraphistry-pyodide-node/node_modules/pyodide/pyodide.mjs node demos/gfql/pyodide/run-node.mjs /tmp/pygraphistry-pyodide-dist/graphistry-0+unknown-py3-none-any.whl +``` + +The smoke uses `edges.csv` and validates both: + +- AST GFQL: `e(edge_match={"weight": ge(2)})`, returning two filtered edges. +- Cypher parser path: `MATCH (a)-[e]->(b) WHERE e.weight >= 2 RETURN e`, returning two projected rows. + +Both paths bind a small `id` nodes table derived from the CSV endpoints before running GFQL. That avoids pandas 3.0.2 concat edge cases in the current Pyodide runtime when Graphistry has to synthesize nodes. + +Build a static Pyodide 314 bundle: + +```bash +node demos/gfql/pyodide/build-bundle.mjs /tmp/pygraphistry-gfql-pyodide-bundle +``` + +The builder supports two flavors: + +- `self-hosted`: copies Pyodide, Python stdlib, and required Pyodide wheels into + the bundle. This is the most reproducible/offline option and is about 22 MiB. +- `cdn`: keeps only the demo files plus Graphistry/`lark` wheels, and loads the + pinned Pyodide 314 runtime/packages from jsDelivr. This is the smallest hosted + artifact and is about 1 MiB, but first cold load still downloads Pyodide and + pandas from the CDN. + +```bash +node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-cdn --flavor cdn +node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-self-hosted --flavor self-hosted +``` + +To generate the Read the Docs "Try it live" payload before a Sphinx HTML build: + +```bash +node demos/gfql/pyodide/build-bundle.mjs --docs-static --flavor cdn +``` + +That writes the bundle to `docs/source/static/gfql/pyodide/`, which is ignored +by git because it contains generated docs artifacts and local wheels. + +The bundle includes `gfql.js`, `browser.html`, `edges.csv`, `manifest.json`, +`size-report.json`, and wheels under `wheels/`. The `self-hosted` flavor also +includes `pyodide/`. Serve it with: + +```bash +cd /tmp/pygraphistry-gfql-pyodide-bundle +python -m http.server 8000 +``` + +Then open `http://localhost:8000/browser.html`. + +Run the browser smoke: + +```bash +npm install --prefix demos/gfql/pyodide --no-audit --no-fund +npm exec --prefix demos/gfql/pyodide -- playwright install chromium +node demos/gfql/pyodide/test-browser.mjs /tmp/pygraphistry-gfql-pyodide-bundle +``` + +Benchmark it: + +```bash +GFQL_BENCH_SIZES=10,1000,10000 GFQL_BENCH_REPEAT=3 \ + node /tmp/pygraphistry-gfql-pyodide-bundle/benchmark-node.mjs \ + /tmp/pygraphistry-gfql-pyodide-bundle +``` diff --git a/demos/gfql/pyodide/benchmark-node.mjs b/demos/gfql/pyodide/benchmark-node.mjs new file mode 100644 index 0000000000..69160d92e6 --- /dev/null +++ b/demos/gfql/pyodide/benchmark-node.mjs @@ -0,0 +1,134 @@ +import { readFile } from "node:fs/promises"; +import { join, resolve } from "node:path"; +import { performance } from "node:perf_hooks"; +import { createGFQLRuntime } from "./gfql.js"; + +const bundleDir = resolve(process.argv[2] || "/tmp/pygraphistry-gfql-pyodide-bundle"); +const sizes = (process.env.GFQL_BENCH_SIZES || "10,1000,10000") + .split(",") + .map((value) => Number(value.trim())) + .filter((value) => Number.isFinite(value) && value > 0); +const repeat = Number(process.env.GFQL_BENCH_REPEAT || "3"); + +function generateCsv(edgeCount) { + const lines = ["src,dst,weight"]; + for (let i = 0; i < edgeCount; i += 1) { + lines.push(`n${i},n${i + 1},${i % 5}`); + } + return `${lines.join("\n")}\n`; +} + +function median(values) { + const sorted = [...values].sort((a, b) => a - b); + return sorted[Math.floor(sorted.length / 2)]; +} + +async function timed(fn) { + const start = performance.now(); + const value = await fn(); + return { value, ms: performance.now() - start }; +} + +function markdownTable(report) { + const lines = [ + "| edges | AST GFQL median ms | Cypher median ms | returned rows |", + "| ---: | ---: | ---: | ---: |", + ]; + for (const row of report.queries) { + lines.push( + `| ${row.edges} | ${row.astMedianMs.toFixed(1)} | ${row.cypherMedianMs.toFixed(1)} | ${row.rows} |`, + ); + } + return lines.join("\n"); +} + +async function main() { + const manifest = JSON.parse(await readFile(join(bundleDir, "manifest.json"), "utf8")); + const sizeReport = JSON.parse(await readFile(join(bundleDir, "size-report.json"), "utf8")); + const wheelPath = join(bundleDir, manifest.graphistryWheel.replace("./", "")); + const wheelData = new Uint8Array(await readFile(wheelPath)); + const pyodideModule = process.env.PYODIDE_MODULE || join(bundleDir, "pyodide/pyodide.mjs"); + if (/^https?:\/\//.test(pyodideModule)) { + throw new Error("benchmark-node.mjs needs a local Pyodide module. Build with --flavor self-hosted or set PYODIDE_MODULE to a local pyodide.mjs."); + } + const requirements = await Promise.all(manifest.requirements.map(async (requirement) => { + if (!requirement.startsWith("./")) { + return requirement; + } + const path = join(bundleDir, requirement.replace("./", "")); + return { + path: `/tmp/${path.split("/").pop()}`, + data: new Uint8Array(await readFile(path)), + }; + })); + + const importResult = await timed(() => import(pyodideModule)); + const runtimeResult = await timed(() => createGFQLRuntime({ + loadPyodide: importResult.value.loadPyodide, + indexURL: manifest.indexURL.startsWith("./") + ? `${join(bundleDir, manifest.indexURL.replace("./", ""))}/` + : manifest.indexURL, + packageBaseUrl: manifest.packageBaseUrl && manifest.packageBaseUrl.startsWith("./") + ? `${join(bundleDir, manifest.packageBaseUrl.replace("./", ""))}/` + : manifest.packageBaseUrl, + pyodidePackages: manifest.pyodidePackages, + requirements, + graphistryWheel: { + path: `/tmp/${wheelPath.split("/").pop()}`, + data: wheelData, + }, + })); + const runtime = runtimeResult.value; + + const warmCsv = generateCsv(10); + await runtime.runEdgeWeightAtLeast({ csv: warmCsv, minWeight: 3 }); + await runtime.runCypherCsv({ + csv: warmCsv, + query: "MATCH (a)-[e]->(b) WHERE e.weight >= 3 RETURN e", + }); + + const queries = []; + for (const edgeCount of sizes) { + const csv = generateCsv(edgeCount); + const astTimes = []; + const cypherTimes = []; + let rows = 0; + for (let i = 0; i < repeat; i += 1) { + const ast = await timed(() => runtime.runEdgeWeightAtLeast({ csv, minWeight: 3 })); + const cypher = await timed(() => runtime.runCypherCsv({ + csv, + query: "MATCH (a)-[e]->(b) WHERE e.weight >= 3 RETURN e", + })); + astTimes.push(ast.ms); + cypherTimes.push(cypher.ms); + rows = ast.value.edges.length; + } + queries.push({ + edges: edgeCount, + rows, + astMedianMs: median(astTimes), + cypherMedianMs: median(cypherTimes), + astMs: astTimes, + cypherMs: cypherTimes, + }); + } + + const report = { + pyodideVersion: manifest.pyodideVersion, + bundleBytes: sizeReport.totalBytes, + pyodideBytes: sizeReport.pyodideBytes, + wheelsBytes: sizeReport.wheelsBytes, + importPyodideModuleMs: importResult.ms, + createRuntimeMs: runtimeResult.ms, + repeat, + queries, + }; + + console.log(JSON.stringify(report, null, 2)); + console.log("\n" + markdownTable(report)); +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/demos/gfql/pyodide/browser.html b/demos/gfql/pyodide/browser.html new file mode 100644 index 0000000000..dda1e3b470 --- /dev/null +++ b/demos/gfql/pyodide/browser.html @@ -0,0 +1,422 @@ + + + + + + GFQL Pyodide live demo + + + +
+
+

GFQL in the browser

+

Pyodide loads a local PyGraphistry wheel, reads CSV edges, and runs GFQL without a Python install or backend.

+
+ +
+
+

Input

+ +
+ +
+ +
+ + + +
+
+ +
+

Output

+
+ Ready +
+
+
+
+

Native GFQL edges

+
+
+
+

Cypher rows

+
+
+
+

JSON

+
{}
+
+
+
+
+
+ + + + diff --git a/demos/gfql/pyodide/build-bundle.mjs b/demos/gfql/pyodide/build-bundle.mjs new file mode 100644 index 0000000000..8b91700eca --- /dev/null +++ b/demos/gfql/pyodide/build-bundle.mjs @@ -0,0 +1,383 @@ +import { copyFile, cp, mkdir, readdir, readFile, rm, stat, writeFile } from "node:fs/promises"; +import { spawnSync } from "node:child_process"; +import { createHash } from "node:crypto"; +import { basename, dirname, join, relative, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +const PYODIDE_VERSION = "314.0.0"; +const PYODIDE_CDN_URL = `https://cdn.jsdelivr.net/pyodide/v${PYODIDE_VERSION}/full/`; +const BUNDLE_FLAVORS = new Set(["self-hosted", "cdn"]); +const PYODIDE_CORE_PACKAGES = [ + "micropip", + "pandas", +]; +const PYODIDE_REQUIREMENTS = [ + "requests", + "packaging", + "typing-extensions", +]; +const VENDORED_WHEEL_REQUIREMENTS = [ + "lark>=1.1,<2", +]; + +const scriptDir = dirname(fileURLToPath(import.meta.url)); +const repoRoot = resolve(scriptDir, "../../.."); +const defaultOutDir = "/tmp/pygraphistry-gfql-pyodide-bundle"; +const docsOutDir = join(repoRoot, "docs/source/static/gfql/pyodide"); + +function parseArgs(argv) { + const options = { + flavor: process.env.GFQL_PYODIDE_BUNDLE_FLAVOR || "self-hosted", + outDir: undefined, + }; + for (let i = 0; i < argv.length; i += 1) { + const arg = argv[i]; + if (arg === "--docs-static") { + options.outDir = docsOutDir; + } else if (arg === "--flavor") { + i += 1; + options.flavor = argv[i]; + } else if (arg.startsWith("--flavor=")) { + options.flavor = arg.slice("--flavor=".length); + } else if (arg === "--help" || arg === "-h") { + console.log([ + "Usage: node demos/gfql/pyodide/build-bundle.mjs [out-dir] [--docs-static] [--flavor self-hosted|cdn]", + "", + "Flavors:", + " self-hosted Copy Pyodide runtime and required package wheels into the bundle.", + " cdn Keep only demo files and wheels; load Pyodide 314 from the pinned CDN.", + ].join("\n")); + process.exit(0); + } else if (arg.startsWith("--")) { + throw new Error(`Unknown option: ${arg}`); + } else if (!options.outDir) { + options.outDir = arg; + } else { + throw new Error(`Unexpected argument: ${arg}`); + } + } + if (!BUNDLE_FLAVORS.has(options.flavor)) { + throw new Error(`Unknown bundle flavor "${options.flavor}". Expected one of: ${[...BUNDLE_FLAVORS].join(", ")}`); + } + options.outDir = resolve(options.outDir || defaultOutDir); + return options; +} + +const buildOptions = parseArgs(process.argv.slice(2)); +const outDir = buildOptions.outDir; +const bundleFlavor = buildOptions.flavor; +const workDir = join(outDir, ".work"); +const srcCopy = join(workDir, "src"); +const pyodideNode = join(workDir, "node"); +const wheelDir = join(outDir, "wheels"); +const pyodideOutDir = join(outDir, "pyodide"); + +async function directorySize(path) { + const entry = await stat(path); + if (!entry.isDirectory()) { + return entry.size; + } + const children = await readdir(path); + let total = 0; + for (const child of children) { + total += await directorySize(join(path, child)); + } + return total; +} + +async function removeIfExists(path) { + await rm(path, { recursive: true, force: true }); +} + +async function prunePyodideRuntime(path) { + const removable = [ + "console.html", + "console-v2.html", + "ffi.d.ts", + "package.json", + "pyodide.asm.mjs.map", + "pyodide.d.ts", + "pyodide.js", + "pyodide.js.map", + "pyodide.mjs.map", + "README.md", + ]; + await Promise.all(removable.map((filename) => removeIfExists(join(path, filename)))); +} + +function sha256(bytes) { + return createHash("sha256").update(bytes).digest("hex"); +} + +function collectPyodidePackages(lockFile, packageNames) { + const packages = lockFile.packages || {}; + const seen = new Set(); + + function visit(packageName) { + if (seen.has(packageName)) { + return; + } + const metadata = packages[packageName]; + if (!metadata) { + throw new Error(`Pyodide lockfile does not include package: ${packageName}`); + } + seen.add(packageName); + for (const dependency of metadata.depends || []) { + visit(dependency); + } + } + + for (const packageName of packageNames) { + visit(packageName); + } + + return [...seen].sort(); +} + +async function fetchBytesWithRetry(url, attempts = 3) { + let lastError; + for (let attempt = 1; attempt <= attempts; attempt += 1) { + try { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`${response.status} ${response.statusText}`); + } + return Buffer.from(await response.arrayBuffer()); + } catch (error) { + lastError = error; + if (attempt < attempts) { + await new Promise((resolvePromise) => setTimeout(resolvePromise, attempt * 1000)); + } + } + } + throw new Error(`Failed to download ${url}: ${lastError?.message || lastError}`); +} + +async function downloadBytes(url, outputPath) { + try { + const bytes = await fetchBytesWithRetry(url); + await writeFile(outputPath, bytes); + return bytes; + } catch (fetchError) { + const result = spawnSync("curl", [ + "-L", + "--fail", + "--silent", + "--show-error", + "--retry", "3", + "--output", outputPath, + url, + ], { + encoding: "utf8", + stdio: "pipe", + }); + if (result.status !== 0) { + throw new Error([ + `Failed to download ${url}`, + `node fetch: ${fetchError?.message || fetchError}`, + `curl: ${result.stderr || result.stdout}`, + ].join("\n")); + } + return readFile(outputPath); + } +} + +async function downloadPyodidePackages(pyodideDir, packageNames) { + const lockFile = JSON.parse(await readFile(join(pyodideDir, "pyodide-lock.json"), "utf8")); + const packages = lockFile.packages || {}; + const resolvedPackages = collectPyodidePackages(lockFile, packageNames); + const baseURL = `https://cdn.jsdelivr.net/pyodide/v${PYODIDE_VERSION}/full`; + + for (const packageName of resolvedPackages) { + const metadata = packages[packageName]; + const outputPath = join(pyodideDir, metadata.file_name); + let bytes; + try { + bytes = await readFile(outputPath); + } catch { + bytes = await downloadBytes(`${baseURL}/${metadata.file_name}`, outputPath); + } + if (sha256(bytes) !== metadata.sha256) { + throw new Error(`Checksum mismatch for ${metadata.file_name}`); + } + } + + return resolvedPackages; +} + +function run(command, args, options = {}) { + const result = spawnSync(command, args, { + cwd: options.cwd || repoRoot, + env: { ...process.env, ...options.env }, + encoding: "utf8", + stdio: options.capture ? "pipe" : "inherit", + }); + if (result.status !== 0) { + throw new Error(`${command} ${args.join(" ")} failed with exit ${result.status}`); + } + return result; +} + +function sourceCopyExcludes() { + const excludes = [ + ".git", + "plans", + "uv.lock", + "=2", + ]; + const relativeOutDir = relative(repoRoot, outDir); + if (relativeOutDir && relativeOutDir !== "." && !relativeOutDir.startsWith("..")) { + excludes.push(relativeOutDir, `${relativeOutDir}/***`); + } + return excludes.flatMap((pattern) => ["--exclude", pattern]); +} + +async function main() { + await rm(outDir, { recursive: true, force: true }); + await mkdir(wheelDir, { recursive: true }); + await mkdir(workDir, { recursive: true }); + + run("rsync", [ + "-a", + ...sourceCopyExcludes(), + "./", + `${srcCopy}/`, + ]); + + run("uv", [ + "run", + "--no-project", + "--with", "build", + "python", + "-m", "build", + "--wheel", + "--outdir", wheelDir, + srcCopy, + ]); + + run("uv", [ + "run", + "--no-project", + "--with", "pip", + "python", + "-m", "pip", + "download", + "--only-binary=:all:", + "--dest", wheelDir, + ...VENDORED_WHEEL_REQUIREMENTS, + ]); + + run("npm", [ + "install", + "--prefix", pyodideNode, + "--no-audit", + "--no-fund", + "--ignore-scripts", + `pyodide@${PYODIDE_VERSION}`, + ]); + const pyodidePackageDir = join(pyodideNode, "node_modules/pyodide"); + const vendoredPyodidePackages = await downloadPyodidePackages(pyodidePackageDir, [ + ...PYODIDE_CORE_PACKAGES, + ...PYODIDE_REQUIREMENTS, + ]); + + const graphistryWheel = run("bash", [ + "-lc", + `ls ${JSON.stringify(wheelDir)}/graphistry-*.whl | head -1`, + ], { capture: true }).stdout.trim(); + const requirementWheelPaths = run("bash", [ + "-lc", + `find ${JSON.stringify(wheelDir)} -maxdepth 1 -name '*.whl' ! -name 'graphistry-*.whl' -print | sort`, + ], { capture: true }).stdout.trim().split("\n").filter(Boolean); + + run("node", [ + join(scriptDir, "run-node.mjs"), + graphistryWheel, + ], { + env: { + PYODIDE_MODULE: join(pyodideNode, "node_modules/pyodide/pyodide.mjs"), + GFQL_REQUIREMENT_WHEELS: requirementWheelPaths.join(":"), + }, + }); + + if (bundleFlavor === "self-hosted") { + await cp(join(pyodideNode, "node_modules/pyodide"), pyodideOutDir, { + recursive: true, + }); + await prunePyodideRuntime(pyodideOutDir); + } + + for (const filename of ["benchmark-node.mjs", "browser.html", "edges.csv", "gfql.js", "package.json", "run-node.mjs", "test-browser.mjs"]) { + await copyFile(join(scriptDir, filename), join(outDir, filename)); + } + + const wheelFiles = run("bash", [ + "-lc", + `find ${JSON.stringify(wheelDir)} -maxdepth 1 -name '*.whl' -printf '%f\\n' | sort`, + ], { capture: true }).stdout.trim().split("\n").filter(Boolean); + const graphistryWheelName = basename(graphistryWheel); + const requirementEntries = wheelFiles + .filter((filename) => filename !== graphistryWheelName) + .map((filename) => `./wheels/${filename}`); + + await writeFile(join(outDir, "manifest.json"), `${JSON.stringify({ + pyodideVersion: PYODIDE_VERSION, + flavor: bundleFlavor, + pyodideModule: bundleFlavor === "self-hosted" ? "./pyodide/pyodide.mjs" : `${PYODIDE_CDN_URL}pyodide.mjs`, + indexURL: bundleFlavor === "self-hosted" ? "./pyodide/" : PYODIDE_CDN_URL, + packageBaseUrl: bundleFlavor === "self-hosted" ? "./pyodide/" : PYODIDE_CDN_URL, + pyodidePackages: vendoredPyodidePackages, + graphistryWheel: `./wheels/${graphistryWheelName}`, + requirements: [ + ...requirementEntries, + ...PYODIDE_REQUIREMENTS, + ], + }, null, 2)}\n`); + + await writeFile(join(outDir, "README.txt"), [ + "GFQL Pyodide bundle", + "", + `Flavor: ${bundleFlavor}`, + `Built from ${relative(process.cwd(), repoRoot) || "."}`, + "", + "Serve locally:", + ` cd ${outDir}`, + " python -m http.server 8000", + " open http://localhost:8000/browser.html", + "", + "Node smoke:", + bundleFlavor === "self-hosted" + ? ` PYODIDE_MODULE=${join(outDir, "pyodide/pyodide.mjs")} node ${join(outDir, "run-node.mjs")} ${join(outDir, "wheels", graphistryWheelName)}` + : " Build with --flavor self-hosted for an offline Node smoke target, or use the browser smoke.", + "", + "Browser smoke:", + ` node ${join(outDir, "test-browser.mjs")} ${outDir}`, + "", + "Benchmark:", + ` node ${join(outDir, "benchmark-node.mjs")} ${outDir}`, + "", + ].join("\n")); + + await rm(workDir, { recursive: true, force: true }); + + const sizeReport = { + totalBytes: 0, + flavor: bundleFlavor, + pyodideBytes: bundleFlavor === "self-hosted" ? await directorySize(pyodideOutDir) : 0, + wheelsBytes: await directorySize(wheelDir), + generatedAt: new Date().toISOString(), + }; + await writeFile(join(outDir, "size-report.json"), `${JSON.stringify(sizeReport, null, 2)}\n`); + sizeReport.totalBytes = await directorySize(outDir); + await writeFile(join(outDir, "size-report.json"), `${JSON.stringify(sizeReport, null, 2)}\n`); + + const manifest = await readFile(join(outDir, "manifest.json"), "utf8"); + console.log(`\nBundle written to ${outDir}`); + console.log(manifest); + console.log(JSON.stringify(sizeReport, null, 2)); +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/demos/gfql/pyodide/edges.csv b/demos/gfql/pyodide/edges.csv new file mode 100644 index 0000000000..180d666965 --- /dev/null +++ b/demos/gfql/pyodide/edges.csv @@ -0,0 +1,4 @@ +src,dst,weight +alice,bob,1 +bob,carol,2 +alice,carol,3 diff --git a/demos/gfql/pyodide/gfql.js b/demos/gfql/pyodide/gfql.js new file mode 100644 index 0000000000..9602f800d0 --- /dev/null +++ b/demos/gfql/pyodide/gfql.js @@ -0,0 +1,238 @@ +export const DEFAULT_PYODIDE_INDEX_URL = "https://cdn.jsdelivr.net/pyodide/v314.0.0/full/"; + +const DEFAULT_PYODIDE_PACKAGES = [ + "micropip", + "pandas", + "requests", + "packaging", + "typing-extensions", +]; +const DEFAULT_GRAPHISTRY_REQUIREMENTS = [ + "lark>=1.1,<2", +]; + +function setGlobals(pyodide, values) { + for (const [key, value] of Object.entries(values)) { + pyodide.globals.set(key, value); + } +} + +async function installGraphistryWheel(pyodide, graphistryWheel) { + if (!graphistryWheel) { + return; + } + + let wheelTarget = graphistryWheel; + if (typeof graphistryWheel !== "string") { + wheelTarget = graphistryWheel.path || "/tmp/graphistry-pyodide.whl"; + pyodide.FS.writeFile(wheelTarget, graphistryWheel.data); + setGlobals(pyodide, { _gfql_graphistry_wheel: wheelTarget }); + await pyodide.runPythonAsync(` +import sysconfig +import zipfile +from pathlib import PurePosixPath + +with zipfile.ZipFile(_gfql_graphistry_wheel) as _gfql_wheel: + for _gfql_member in _gfql_wheel.infolist(): + _gfql_path = PurePosixPath(_gfql_member.filename) + if _gfql_path.is_absolute() or ".." in _gfql_path.parts: + raise ValueError(f"Unsafe wheel member path: {_gfql_member.filename}") + _gfql_wheel.extractall(sysconfig.get_paths()["purelib"]) +`); + return; + } + + setGlobals(pyodide, { _gfql_graphistry_wheel: wheelTarget }); + await pyodide.runPythonAsync(` +import micropip +await micropip.install(_gfql_graphistry_wheel, deps=False) +`); +} + +function mountRequirementWheels(pyodide, requirements) { + return requirements.map((requirement, index) => { + if (typeof requirement === "string") { + return requirement; + } + const path = requirement.path || `/tmp/gfql-requirement-${index}.whl`; + pyodide.FS.writeFile(path, requirement.data); + return `emfs:${path}`; + }); +} + +async function retryAsync(fn, { attempts = 3, delayMs = 1000 } = {}) { + let lastError; + for (let attempt = 1; attempt <= attempts; attempt += 1) { + try { + return await fn(); + } catch (error) { + lastError = error; + if (attempt < attempts) { + await new Promise((resolve) => setTimeout(resolve, delayMs * attempt)); + } + } + } + throw lastError; +} + +export async function createGFQLRuntime({ + loadPyodide, + indexURL, + packageBaseUrl, + pyodidePackages = DEFAULT_PYODIDE_PACKAGES, + requirements = DEFAULT_GRAPHISTRY_REQUIREMENTS, + graphistryWheel, + stdout, + stderr, +} = {}) { + if (!loadPyodide) { + throw new Error("createGFQLRuntime requires a loadPyodide function"); + } + + const loadPyodideOptions = { stdout, stderr }; + if (indexURL) { + loadPyodideOptions.indexURL = indexURL; + } + if (packageBaseUrl) { + loadPyodideOptions.packageBaseUrl = packageBaseUrl; + } + + const pyodide = await loadPyodide(loadPyodideOptions); + await retryAsync(() => pyodide.loadPackage(pyodidePackages)); + + setGlobals(pyodide, { _gfql_requirements: mountRequirementWheels(pyodide, requirements) }); + await pyodide.runPythonAsync(` +import micropip +await micropip.install(_gfql_requirements) +`); + await installGraphistryWheel(pyodide, graphistryWheel); + + await pyodide.runPythonAsync(` +import copy +import pandas as pd +from graphistry.compute.ComputeMixin import ComputeMixin +from graphistry.compute import e, ge + +class GFQLMiniGraph(ComputeMixin): + def __init__(self): + super().__init__() + self._edges = None + self._nodes = None + self._source = None + self._destination = None + self._node = None + self._edge = None + + def bind(self, source=None, destination=None, node=None, edge=None, **kwargs): + out = copy.copy(self) + if source is not None: + out._source = source + if destination is not None: + out._destination = destination + if node is not None: + out._node = node + if edge is not None: + out._edge = edge + return out + + def edges(self, edges, source=None, destination=None, edge=None, **kwargs): + if callable(edges): + edges = edges(self) + out = self.bind(source=source, destination=destination, edge=edge) + out._edges = edges + return out + + def nodes(self, nodes, node=None, **kwargs): + if callable(nodes): + nodes = nodes(self) + out = self.bind(node=node) + out._nodes = nodes + return out + +def _gfql_graph(edges, nodes, source, destination, node): + return GFQLMiniGraph().edges(edges, source, destination).nodes(nodes, node) +`); + + return new GFQLRuntime(pyodide); +} + +export class GFQLRuntime { + constructor(pyodide) { + this.pyodide = pyodide; + } + + async runEdgeWeightAtLeast({ + csv, + source = "src", + destination = "dst", + weightColumn = "weight", + minWeight = 2, + }) { + setGlobals(this.pyodide, { + _gfql_csv: csv, + _gfql_source: source, + _gfql_destination: destination, + _gfql_weight_column: weightColumn, + _gfql_min_weight: minWeight, + }); + + const jsonText = await this.pyodide.runPythonAsync(` +import io +import json +import pandas as pd +from graphistry.compute import e, ge + +def _records(df): + if df is None: + return [] + return json.loads(df.to_json(orient="records")) + +_edges = pd.read_csv(io.StringIO(_gfql_csv)) +_node_ids = pd.unique(_edges[[_gfql_source, _gfql_destination]].to_numpy().ravel()) +_nodes = pd.DataFrame({"id": _node_ids}) +_graph = _gfql_graph(_edges, _nodes, _gfql_source, _gfql_destination, "id") +_result = _graph.gfql([e(edge_match={_gfql_weight_column: ge(_gfql_min_weight)})]) +json.dumps({ + "edges": _records(getattr(_result, "_edges", None)), + "nodes": _records(getattr(_result, "_nodes", None)), +}) +`); + return JSON.parse(jsonText); + } + + async runCypherCsv({ + csv, + query, + source = "src", + destination = "dst", + }) { + setGlobals(this.pyodide, { + _gfql_csv: csv, + _gfql_query: query, + _gfql_source: source, + _gfql_destination: destination, + }); + + const jsonText = await this.pyodide.runPythonAsync(` +import io +import json +import pandas as pd + +def _records(df): + if df is None: + return [] + return json.loads(df.to_json(orient="records")) + +_edges = pd.read_csv(io.StringIO(_gfql_csv)) +_node_ids = pd.unique(_edges[[_gfql_source, _gfql_destination]].to_numpy().ravel()) +_nodes = pd.DataFrame({"id": _node_ids}) +_graph = _gfql_graph(_edges, _nodes, _gfql_source, _gfql_destination, "id") +_result = _graph.gfql(_gfql_query, language="cypher") +json.dumps({ + "edges": _records(getattr(_result, "_edges", None)), + "nodes": _records(getattr(_result, "_nodes", None)), +}) +`); + return JSON.parse(jsonText); + } +} diff --git a/demos/gfql/pyodide/package-lock.json b/demos/gfql/pyodide/package-lock.json new file mode 100644 index 0000000000..3dc1973391 --- /dev/null +++ b/demos/gfql/pyodide/package-lock.json @@ -0,0 +1,59 @@ +{ + "name": "pyodide", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "devDependencies": { + "playwright": "1.60.0" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.60.0", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.60.0.tgz", + "integrity": "sha512-hheHdokM8cdqCb0lcE3s+zT4t4W+vvjpGxsZlDnikarzx8tSzMebh3UiFtgqwFwnTnjYQcsyMF8ei2mCO/tpeA==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.60.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.60.0", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.60.0.tgz", + "integrity": "sha512-9bW6zvX/m0lEbgTKJ6YppOKx8H3VOPBMOCFh2irXFOT4BbHgrx5hPjwJYLT40Lu+4qtD36qKc/Hn56StUW57IA==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + } + } +} diff --git a/demos/gfql/pyodide/package.json b/demos/gfql/pyodide/package.json new file mode 100644 index 0000000000..3b00b8b6f2 --- /dev/null +++ b/demos/gfql/pyodide/package.json @@ -0,0 +1,12 @@ +{ + "type": "module", + "scripts": { + "build": "node build-bundle.mjs", + "build:cdn": "node build-bundle.mjs --flavor cdn", + "build:self-hosted": "node build-bundle.mjs --flavor self-hosted", + "test:browser": "node test-browser.mjs" + }, + "devDependencies": { + "playwright": "1.60.0" + } +} diff --git a/demos/gfql/pyodide/run-node.mjs b/demos/gfql/pyodide/run-node.mjs new file mode 100644 index 0000000000..9ab106c582 --- /dev/null +++ b/demos/gfql/pyodide/run-node.mjs @@ -0,0 +1,54 @@ +import { readFile } from "node:fs/promises"; +import { basename, dirname, resolve } from "node:path"; +import { createGFQLRuntime } from "./gfql.js"; + +const wheelPath = process.argv[2]; +if (!wheelPath) { + throw new Error("Usage: node demos/gfql/pyodide/run-node.mjs "); +} + +const pyodideModule = process.env.PYODIDE_MODULE || "pyodide"; +const pyodideIndexURL = process.env.PYODIDE_INDEX_URL + || (pyodideModule === "pyodide" ? undefined : dirname(resolve(pyodideModule))); +const { loadPyodide } = await import(pyodideModule); +const csv = await readFile(new URL("./edges.csv", import.meta.url), "utf8"); +const wheelBytes = await readFile(resolve(wheelPath)); +const requirementWheelPaths = (process.env.GFQL_REQUIREMENT_WHEELS || "") + .split(":") + .map((value) => value.trim()) + .filter(Boolean); +const requirements = await Promise.all(requirementWheelPaths.map(async (requirementPath) => ({ + path: `/tmp/${basename(requirementPath)}`, + data: await readFile(resolve(requirementPath)), +}))); + +const runtime = await createGFQLRuntime({ + loadPyodide, + ...(pyodideIndexURL ? { indexURL: pyodideIndexURL } : {}), + ...(requirements.length > 0 ? { requirements } : {}), + graphistryWheel: { + path: `/tmp/${basename(wheelPath)}`, + data: wheelBytes, + }, +}); + +const astResult = await runtime.runEdgeWeightAtLeast({ csv, minWeight: 2 }); +if (astResult.edges.length !== 2) { + throw new Error(`Expected 2 AST GFQL edges, got ${astResult.edges.length}`); +} +if (!astResult.edges.every((edge) => edge.weight >= 2)) { + throw new Error(`Expected AST GFQL weights >= 2: ${JSON.stringify(astResult.edges)}`); +} + +const cypherResult = await runtime.runCypherCsv({ + csv, + query: "MATCH (a)-[e]->(b) WHERE e.weight >= 2 RETURN e", +}); +if (cypherResult.nodes.length !== 2) { + throw new Error(`Expected 2 Cypher rows, got ${cypherResult.nodes.length}`); +} + +console.log(JSON.stringify({ + astEdges: astResult.edges, + cypherRows: cypherResult.nodes, +}, null, 2)); diff --git a/demos/gfql/pyodide/test-browser.mjs b/demos/gfql/pyodide/test-browser.mjs new file mode 100644 index 0000000000..41c36ba375 --- /dev/null +++ b/demos/gfql/pyodide/test-browser.mjs @@ -0,0 +1,167 @@ +import { createServer } from "node:http"; +import { readFile, stat } from "node:fs/promises"; +import { extname, normalize, resolve, sep } from "node:path"; +import { pathToFileURL } from "node:url"; + +const bundleDir = resolve(process.argv[2] || "/tmp/pygraphistry-gfql-pyodide-bundle"); +const timeoutMs = Number(process.env.GFQL_BROWSER_TIMEOUT_MS || "120000"); +const screenshotPath = process.env.GFQL_BROWSER_SCREENSHOT; + +const contentTypes = new Map([ + [".csv", "text/csv; charset=utf-8"], + [".html", "text/html; charset=utf-8"], + [".js", "text/javascript; charset=utf-8"], + [".json", "application/json; charset=utf-8"], + [".mjs", "text/javascript; charset=utf-8"], + [".txt", "text/plain; charset=utf-8"], + [".wasm", "application/wasm"], + [".whl", "application/octet-stream"], + [".zip", "application/zip"], +]); + +async function fileExists(path) { + try { + return (await stat(path)).isFile(); + } catch { + return false; + } +} + +function resolveRequestPath(urlPath) { + const decoded = decodeURIComponent(urlPath.split("?")[0]); + const relativePath = normalize(decoded === "/" ? "browser.html" : decoded.slice(1)); + const absolutePath = resolve(bundleDir, relativePath); + if (absolutePath !== bundleDir && !absolutePath.startsWith(`${bundleDir}${sep}`)) { + return undefined; + } + return absolutePath; +} + +async function startServer() { + const server = createServer(async (request, response) => { + try { + const requestPath = resolveRequestPath(request.url || "/"); + if (!requestPath || !(await fileExists(requestPath))) { + response.writeHead(404, { "content-type": "text/plain; charset=utf-8" }); + response.end("not found"); + return; + } + const body = await readFile(requestPath); + response.writeHead(200, { + "content-length": body.length, + "content-type": contentTypes.get(extname(requestPath)) || "application/octet-stream", + }); + if (request.method === "HEAD") { + response.end(); + } else { + response.end(body); + } + } catch (error) { + response.writeHead(500, { "content-type": "text/plain; charset=utf-8" }); + response.end(String(error.stack || error.message || error)); + } + }); + + await new Promise((resolvePromise) => server.listen(0, "127.0.0.1", resolvePromise)); + const address = server.address(); + return { + server, + baseURL: `http://127.0.0.1:${address.port}`, + }; +} + +async function importPlaywright() { + const candidates = [ + process.env.PLAYWRIGHT_MODULE, + "playwright", + pathToFileURL(resolve(bundleDir, "node_modules/playwright/index.mjs")).href, + pathToFileURL(resolve(process.cwd(), "demos/gfql/pyodide/node_modules/playwright/index.mjs")).href, + pathToFileURL(resolve(process.cwd(), "node_modules/playwright/index.mjs")).href, + ].filter(Boolean); + + const errors = []; + for (const candidate of candidates) { + if (candidate.startsWith("file://") && !(await fileExists(new URL(candidate)))) { + continue; + } + try { + return await import(candidate); + } catch (error) { + errors.push(`${candidate}: ${error.message}`); + } + } + + throw new Error([ + "Playwright is not installed or could not be imported.", + "Run `npm install --prefix demos/gfql/pyodide` or set PLAYWRIGHT_MODULE.", + ...errors, + ].join("\n")); +} + +async function main() { + const { chromium } = await importPlaywright(); + + const { server, baseURL } = await startServer(); + const browser = await chromium.launch({ + headless: true, + args: ["--no-sandbox"], + }); + const page = await browser.newPage(); + page.setDefaultTimeout(timeoutMs); + + const pageErrors = []; + page.on("pageerror", (error) => pageErrors.push(error.message)); + page.on("console", (message) => { + if (message.type() === "error") { + pageErrors.push(message.text()); + } + }); + + try { + await page.goto(`${baseURL}/browser.html`, { waitUntil: "domcontentloaded" }); + await page.waitForFunction(() => { + const raw = document.querySelector("#jsonOutput")?.textContent || ""; + try { + const parsed = JSON.parse(raw); + return parsed.astEdges?.length === 3 && parsed.cypherRows?.length === 3; + } catch { + return false; + } + }); + + const result = await page.evaluate(() => JSON.parse(document.querySelector("#jsonOutput").textContent)); + const status = await page.textContent("#status"); + const metrics = await page.$$eval("#metrics tbody tr", (rows) => rows.map((row) => row.textContent.trim())); + + if (!status.includes("Ready")) { + throw new Error(`Expected Ready status, got: ${status}`); + } + if (!metrics.some((text) => text.includes("Create Pyodide GFQL runtime"))) { + throw new Error(`Expected runtime creation metric, got: ${JSON.stringify(metrics)}`); + } + if (!result.astEdges.every((edge) => edge.weight >= 2)) { + throw new Error(`Expected AST edge weights >= 2, got: ${JSON.stringify(result.astEdges)}`); + } + if (pageErrors.length > 0) { + throw new Error(`Browser console/page errors:\n${pageErrors.join("\n")}`); + } + if (screenshotPath) { + await page.screenshot({ path: screenshotPath, fullPage: true }); + } + console.log(JSON.stringify({ + ok: true, + url: `${baseURL}/browser.html`, + astEdges: result.astEdges.length, + cypherRows: result.cypherRows.length, + metrics, + }, null, 2)); + } finally { + await browser.close(); + await new Promise((resolvePromise) => server.close(resolvePromise)); + } +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/docs/source/gfql/index.rst b/docs/source/gfql/index.rst index 8362feb6f7..c6547406dd 100644 --- a/docs/source/gfql/index.rst +++ b/docs/source/gfql/index.rst @@ -56,6 +56,7 @@ See also: combo quick cypher + pyodide where return predicates/quick diff --git a/docs/source/gfql/pyodide.rst b/docs/source/gfql/pyodide.rst new file mode 100644 index 0000000000..ccf5cc280b --- /dev/null +++ b/docs/source/gfql/pyodide.rst @@ -0,0 +1,293 @@ +GFQL in Pyodide +================ + +This page shows how to run a small GFQL workload in JavaScript with +`Pyodide `__. + +Why Pyodide 314? +---------------- + +Pyodide 314 aligns Pyodide versioning with Python 3.14, ships as a native ES +module runtime, and standardizes package publishing around PyEmscripten wheels. +That makes it a good baseline for a browser-side GFQL proof: the JavaScript +entrypoint can import Pyodide as an ES module, load Pyodide-native packages +such as ``pandas`` and ``pyarrow``, and install pure Python wheels at runtime. + +For this GFQL demo, the important runtime pieces are: + +- Pyodide packages: ``micropip``, ``pandas``, ``requests``, ``packaging``, and + ``typing-extensions``. +- Pure Python wheels: ``lark`` for the local Cypher parser. +- A pure Python wheel for this repository. + +The browser demo intentionally avoids the top-level ``graphistry.edges(...)`` +constructor and uses a small in-Pyodide graph object backed by +``ComputeMixin``. That keeps the live demo on a pandas-only path. ``pyarrow`` +is still useful for normal PyGraphistry upload and Arrow workflows, but it is +not loaded for this CSV/GFQL browser proof. + +Build the bundle +---------------- + +From the repository root: + +.. code-block:: bash + + node demos/gfql/pyodide/build-bundle.mjs /tmp/pygraphistry-gfql-pyodide-bundle + +Choose a bundle flavor: + +- ``self-hosted`` copies Pyodide, Python stdlib, and required Pyodide wheels + into the bundle. It is the most reproducible/offline option. +- ``cdn`` publishes only the demo files plus Graphistry/``lark`` wheels and + loads the pinned Pyodide 314 runtime/packages from jsDelivr. It keeps the + hosted artifact small, but first cold load still downloads Pyodide and + ``pandas`` from the CDN. + +.. code-block:: bash + + node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-cdn --flavor cdn + node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-self-hosted --flavor self-hosted + +For Read the Docs, build the small CDN-backed flavor into the Sphinx static tree +before the HTML build: + +.. code-block:: bash + + node demos/gfql/pyodide/build-bundle.mjs --docs-static --flavor cdn + +This writes the live demo under ``docs/source/static/gfql/pyodide/``. The +directory is ignored by git because it contains generated wheels and static +assets; publish it as a generated docs artifact, not as checked-in source. +The Read the Docs build runs this command before the HTML build so the live +page is included in the published ``_static`` directory. + +The builder: + +1. Copies the repo to a temporary build directory. +2. Builds a pure Python ``graphistry`` wheel. +3. Downloads wheels that Pyodide does not ship directly, currently ``lark``. +4. Installs ``pyodide@314.0.0``, resolves the required Pyodide package closure + from the Pyodide lockfile, and verifies checksums for downloaded wheels. +5. Writes a static bundle containing ``pyodide/``, ``gfql.js``, ``edges.csv``, + ``browser.html``, ``manifest.json``, ``size-report.json``, and wheels under + ``wheels/``. The ``cdn`` flavor omits ``pyodide/`` and points the manifest + at the pinned CDN runtime instead. +6. Prunes non-runtime Pyodide files such as source maps, legacy console pages, + and TypeScript declarations. + +Try it live +----------- + +When the generated static bundle is published with the docs, open the live +browser demo: + +.. raw:: html + +

+ + Try GFQL in your browser + +

+ +The live page runs entirely in the browser: it loads Pyodide, installs the +bundled Graphistry wheel and small pure-Python dependencies, reads the sample +CSV, then executes both a native GFQL predicate query and a Cypher-style +``MATCH`` query. The page includes editable CSV, editable Cypher, rendered +tables, raw JSON, and per-step timing. + +If the link returns 404 in a local docs build, generate the static bundle first +with ``node demos/gfql/pyodide/build-bundle.mjs --docs-static`` and rebuild the +HTML docs. + +Run the browser tutorial +------------------------ + +Serve the generated directory as static files: + +.. code-block:: bash + + cd /tmp/pygraphistry-gfql-pyodide-bundle + python -m http.server 8000 + +Then open: + +.. code-block:: text + + http://localhost:8000/browser.html + +The page loads ``pyodide/pyodide.mjs``, installs the local wheels listed in +``manifest.json``, loads ``edges.csv``, and runs: + +.. code-block:: javascript + + const astResult = await runtime.runEdgeWeightAtLeast({ csv, minWeight: 2 }); + const cypherResult = await runtime.runCypherCsv({ + csv, + query: "MATCH (a)-[e]->(b) WHERE e.weight >= 2 RETURN e", + }); + +The expected output is: + +.. code-block:: json + + { + "astEdges": [ + {"src": "bob", "dst": "carol", "weight": 2}, + {"src": "alice", "dst": "carol", "weight": 3} + ], + "cypherRows": [ + {"e": "[{weight: 2}]"}, + {"e": "[{weight: 3}]"} + ] + } + +Run the Node smoke +------------------ + +The same bundle can be checked in Node: + +.. code-block:: bash + + PYODIDE_MODULE=/tmp/pygraphistry-gfql-pyodide-bundle/pyodide/pyodide.mjs \ + node /tmp/pygraphistry-gfql-pyodide-bundle/run-node.mjs \ + /tmp/pygraphistry-gfql-pyodide-bundle/wheels/graphistry-0+unknown-py3-none-any.whl + +Benchmark and size report +------------------------- + +The builder writes ``size-report.json``. Recent local builds of this demo +reported: + +.. list-table:: + :header-rows: 1 + + * - Flavor + - Bundle part + - Approximate bytes + - Approximate size + * - ``cdn`` + - Total static bundle + - 975,000 + - 0.9 MiB + * - ``cdn`` + - Pyodide runtime and package cache + - 0 + - CDN-backed + * - ``cdn`` + - Graphistry and ``lark`` wheels + - 943,000 + - 0.9 MiB + * - ``self-hosted`` + - Total static bundle + - 23,126,000 + - 22.0 MiB + * - ``self-hosted`` + - Pyodide runtime and package cache + - 22,151,080 + - 21.1 MiB + * - ``self-hosted`` + - Graphistry and ``lark`` wheels + - 943,000 + - 0.9 MiB + +Run the benchmark: + +.. code-block:: bash + + node /tmp/pygraphistry-gfql-pyodide-bundle/benchmark-node.mjs \ + /tmp/pygraphistry-gfql-pyodide-bundle + +Set ``GFQL_BENCH_SIZES`` and ``GFQL_BENCH_REPEAT`` to change the workload: + +.. code-block:: bash + + GFQL_BENCH_SIZES=10,1000,10000 GFQL_BENCH_REPEAT=3 \ + node /tmp/pygraphistry-gfql-pyodide-bundle/benchmark-node.mjs \ + /tmp/pygraphistry-gfql-pyodide-bundle + +Example local Node timings after one warmup run: + +.. list-table:: + :header-rows: 1 + + * - Edges + - Native GFQL median + - Cypher median + - Returned rows + * - 10 + - 43.0 ms + - 95.9 ms + - 4 + * - 1,000 + - 51.2 ms + - 105.7 ms + - 400 + * - 10,000 + - 138.2 ms + - 179.4 ms + - 4,000 + +In the same run, creating the Pyodide GFQL runtime took about 6.82 seconds. +Browser numbers will vary with network, cache state, CPU, and whether the +server compresses static assets. + +Run the browser smoke +--------------------- + +The browser smoke starts a local static server, opens ``browser.html`` in +Chromium, waits for the auto-run output, and checks that both native GFQL and +Cypher return the expected rows: + +.. code-block:: bash + + npm install --prefix demos/gfql/pyodide --no-audit --no-fund + npm exec --prefix demos/gfql/pyodide -- playwright install chromium + node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-cdn --flavor cdn + node demos/gfql/pyodide/test-browser.mjs /tmp/gfql-cdn + +CI runs the same smoke against the ``cdn`` flavor. A recent cold CDN browser +run took about 46.6 seconds to create the Pyodide runtime, then 78 ms for the +native GFQL query and 487 ms for the Cypher query. Warm browser/cache behavior +should be faster; keep release numbers benchmark-driven. + +Hosting and versioning +---------------------- + +The generated RTD demo uses the ``cdn`` flavor under +``_static/gfql/pyodide/``. That keeps the published docs artifact small while +pinning all Pyodide URLs to ``v314.0.0``. + +For standalone apps, two patterns are reasonable: + +- **Self-hosted bundle**: publish the generated directory with the app or docs. + This is the most reproducible option and works offline after the first page + load if the browser cache keeps the assets. +- **Pinned CDN runtime**: load Pyodide from + ``https://cdn.jsdelivr.net/pyodide/v314.0.0/full/`` and host only + ``browser.html``, ``gfql.js``, the Graphistry wheel, ``lark``, and the + manifest. Pyodide's docs list this versioned JsDelivr URL as the cached + release CDN path. + +In either mode, keep ``manifest.json`` pinned to the Pyodide version and the +Graphistry wheel built for the release. Static hosts must serve ``.wasm`` with +the WebAssembly MIME type; Python's local ``http.server`` and many static hosts +do this correctly. + +Implementation notes +-------------------- + +- ``gfql.js`` accepts a browser URL wheel or byte-mounted wheel data. URL + wheels use ``micropip.install(url, deps=False)``. Byte-mounted local wheels + are extracted into Pyodide ``purelib`` after validating wheel member paths. +- ``gfql.js`` also accepts byte-mounted dependency wheels. In Node, those are + installed through Pyodide's ``emfs:`` wheel URI support. +- The bundle manifest points ``lark`` at a local wheel and records the resolved + Pyodide package closure. The ``self-hosted`` flavor serves those wheels from + ``pyodide/``; the ``cdn`` flavor serves them from the pinned Pyodide CDN. +- The demo binds a small ``id`` nodes table derived from CSV endpoints before + running GFQL. This avoids pandas 3.0 concat edge cases in Pyodide when + Graphistry has to synthesize nodes. +- The Cypher example intentionally uses ``RETURN e`` because broader + multi-alias Cypher projection is outside the currently supported local GFQL + compiler subset. diff --git a/docs/source/static/gfql/.gitignore b/docs/source/static/gfql/.gitignore new file mode 100644 index 0000000000..dfbd8300e8 --- /dev/null +++ b/docs/source/static/gfql/.gitignore @@ -0,0 +1 @@ +/pyodide/ diff --git a/graphistry/ArrowFileUploader.py b/graphistry/ArrowFileUploader.py index 55c1af01cf..748304d822 100644 --- a/graphistry/ArrowFileUploader.py +++ b/graphistry/ArrowFileUploader.py @@ -1,9 +1,16 @@ +from __future__ import annotations + import sys, threading, hashlib from typing import Any, Optional, Dict, Tuple -import pyarrow as pa -import pyarrow.ipc as pa_ipc import requests +try: + import pyarrow as pa + import pyarrow.ipc as pa_ipc +except ImportError: + pa = None + pa_ipc = None + from graphistry.utils.requests import log_requests_error from graphistry.otel import inject_trace_headers from .util import setup_logger diff --git a/graphistry/Engine.py b/graphistry/Engine.py index bc39199c49..e97015c999 100644 --- a/graphistry/Engine.py +++ b/graphistry/Engine.py @@ -2,11 +2,19 @@ import warnings import numpy as np import pandas as pd -import pyarrow as pa from typing import Any, List, Optional, Union from typing_extensions import Literal from enum import Enum +try: + import pyarrow as pa +except ImportError: + pa = None + + +def _is_pyarrow_table(value: Any) -> bool: + return pa is not None and isinstance(value, pa.Table) + class Engine(Enum): PANDAS = 'pandas' @@ -72,7 +80,7 @@ def resolve_engine( return Engine.PANDAS # Arrow and Spark are input formats, not compute engines — coerce to pandas at call sites - if isinstance(g_or_df, pa.Table): + if _is_pyarrow_table(g_or_df): return Engine.PANDAS try: @@ -167,7 +175,7 @@ def df_to_engine(df, engine: Engine): if engine == Engine.PANDAS: if isinstance(df, pd.DataFrame): return df - if isinstance(df, pa.Table): + if _is_pyarrow_table(df): return df.to_pandas() type_module = str(type(df).__module__) if 'pyspark' in type_module: diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py index c512b734e0..9367390a13 100644 --- a/graphistry/PlotterBase.py +++ b/graphistry/PlotterBase.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from graphistry.Plottable import Plottable, RenderModes, RenderModesConcrete from typing import Any, Callable, Dict, List, Optional, Union, Tuple, cast, overload, TYPE_CHECKING from typing_extensions import Literal @@ -10,10 +12,15 @@ from graphistry.plugins_types.hypergraph import HypergraphResult from graphistry.render.resolve_render_mode import resolve_render_mode from graphistry.Engine import Engine, EngineAbstractType, df_to_engine -import copy, hashlib, numpy as np, pandas as pd, pyarrow as pa, sys, uuid, warnings +import copy, hashlib, numpy as np, pandas as pd, sys, uuid, warnings from functools import lru_cache, partialmethod from weakref import WeakValueDictionary +try: + import pyarrow as pa +except ImportError: + pa = None + from graphistry.privacy import Privacy, Mode, ModeAction from graphistry.client_session import ( ClientSession, @@ -3018,7 +3025,7 @@ def make_arrow_upload(edges: Any, upload_nodes: Any) -> ArrowUploader: return g._make_arrow_dataset(edges=edges_arr, nodes=nodes_arr, name=name, description=description, metadata=metadata) if isinstance(graph, pd.DataFrame) \ - or isinstance(graph, pa.Table) \ + or (pa is not None and isinstance(graph, pa.Table)) \ or ( not (maybe_cudf() is None) and isinstance(graph, maybe_cudf().DataFrame) ) \ or ( not (maybe_dask_cudf() is None) and isinstance(graph, maybe_dask_cudf().DataFrame) ) \ or ( not (maybe_dask_dataframe() is None) and isinstance(graph, maybe_dask_dataframe().DataFrame) ) \ diff --git a/graphistry/arrow_uploader.py b/graphistry/arrow_uploader.py index 91cb6b5f63..6e7deb3f9c 100644 --- a/graphistry/arrow_uploader.py +++ b/graphistry/arrow_uploader.py @@ -1,6 +1,13 @@ +from __future__ import annotations + from typing import List, Optional, Dict, Any -import base64, io, json, pyarrow as pa, requests, sys +import base64, io, json, requests, sys + +try: + import pyarrow as pa +except ImportError: + pa = None from graphistry.privacy import Mode, Privacy, ModeAction from graphistry.otel import inject_trace_headers