diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 38a81f13b6..a58e4d845f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -388,6 +388,43 @@ jobs:
exit 1
fi
+ gfql-pyodide-browser:
+ needs: changes
+ if: ${{ needs.changes.outputs.docs == 'true' || needs.changes.outputs.gfql == 'true' || needs.changes.outputs.infra == 'true' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }}
+ runs-on: ubuntu-latest
+ timeout-minutes: 15
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ persist-credentials: false
+
+ - name: Set up Python 3.12
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.12"
+
+ - name: Set up Node.js 20
+ uses: actions/setup-node@v4
+ with:
+ node-version: "20"
+
+ - name: Install uv
+ run: python -m pip install "uv==0.11.3"
+
+ - name: Install browser test dependencies
+ run: npm install --prefix demos/gfql/pyodide --no-audit --no-fund
+
+ - name: Install Chromium
+ run: npm exec --prefix demos/gfql/pyodide -- playwright install --with-deps chromium
+
+ - name: Build GFQL Pyodide CDN bundle
+ run: node demos/gfql/pyodide/build-bundle.mjs /tmp/pygraphistry-gfql-pyodide-browser --flavor cdn
+
+ - name: Browser smoke
+ env:
+ GFQL_BROWSER_SCREENSHOT: /tmp/gfql-pyodide-browser.png
+ run: node /tmp/pygraphistry-gfql-pyodide-browser/test-browser.mjs /tmp/pygraphistry-gfql-pyodide-browser
+
check-spark-lockfile:
needs: changes
if: ${{ needs.changes.outputs.spark == 'true' || needs.changes.outputs.infra == 'true' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }}
diff --git a/.readthedocs.yml b/.readthedocs.yml
index caa5cc10bf..4ce955dc4c 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -9,6 +9,7 @@ build:
os: ubuntu-22.04
tools:
python: "3.12"
+ nodejs: "20"
apt_packages:
# System dependencies - now works because we use jobs instead of commands
# More closely mirror https://github.com/sphinx-doc/sphinx-docker-images
@@ -42,6 +43,7 @@ build:
- cp DEVELOP.md docs/source/DEVELOP.md
build:
html:
+ - node demos/gfql/pyodide/build-bundle.mjs --docs-static --flavor cdn
- sphinx-build -b html -d docs/doctrees docs/source $READTHEDOCS_OUTPUT/html/
epub:
- sphinx-build -b epub -d docs/doctrees docs/source docs/_build/epub
diff --git a/demos/gfql/pyodide/README.md b/demos/gfql/pyodide/README.md
new file mode 100644
index 0000000000..2278b68e89
--- /dev/null
+++ b/demos/gfql/pyodide/README.md
@@ -0,0 +1,90 @@
+# Pyodide GFQL proof
+
+This is a small `gfql.js` proof for running PyGraphistry GFQL inside Pyodide.
+
+It uses:
+
+- Pyodide `pandas`, `requests`, `packaging`, and `typing-extensions` packages.
+- `micropip` for the pure Python `lark` runtime dependency used by the Cypher parser.
+- A pure Python wheel for this repo, installed into Pyodide with `deps=False` after the runtime deps are already present.
+
+For a browser URL wheel, `gfql.js` uses `micropip.install(url, deps=False)`. For a Node byte-mounted local wheel, it writes the wheel into Pyodide FS and extracts it into `purelib`; Pyodide/Node `fetch` does not resolve Pyodide FS paths as URLs.
+
+Build a wheel from a writable copy of the repo:
+
+```bash
+rm -rf /tmp/pygraphistry-pyodide-src /tmp/pygraphistry-pyodide-dist
+rsync -a --exclude .git --exclude plans --exclude uv.lock --exclude '=2' ./ /tmp/pygraphistry-pyodide-src/
+uv run --no-project --with build python -m build --wheel --outdir /tmp/pygraphistry-pyodide-dist /tmp/pygraphistry-pyodide-src
+```
+
+Run the Node smoke proof:
+
+```bash
+rm -rf /tmp/pygraphistry-pyodide-node
+npm install --prefix /tmp/pygraphistry-pyodide-node pyodide@314.0.0
+PYODIDE_MODULE=/tmp/pygraphistry-pyodide-node/node_modules/pyodide/pyodide.mjs node demos/gfql/pyodide/run-node.mjs /tmp/pygraphistry-pyodide-dist/graphistry-0+unknown-py3-none-any.whl
+```
+
+The smoke uses `edges.csv` and validates both:
+
+- AST GFQL: `e(edge_match={"weight": ge(2)})`, returning two filtered edges.
+- Cypher parser path: `MATCH (a)-[e]->(b) WHERE e.weight >= 2 RETURN e`, returning two projected rows.
+
+Both paths bind a small `id` nodes table derived from the CSV endpoints before running GFQL. That avoids pandas 3.0.2 concat edge cases in the current Pyodide runtime when Graphistry has to synthesize nodes.
+
+Build a static Pyodide 314 bundle:
+
+```bash
+node demos/gfql/pyodide/build-bundle.mjs /tmp/pygraphistry-gfql-pyodide-bundle
+```
+
+The builder supports two flavors:
+
+- `self-hosted`: copies Pyodide, Python stdlib, and required Pyodide wheels into
+ the bundle. This is the most reproducible/offline option and is about 22 MiB.
+- `cdn`: keeps only the demo files plus Graphistry/`lark` wheels, and loads the
+ pinned Pyodide 314 runtime/packages from jsDelivr. This is the smallest hosted
+ artifact and is about 1 MiB, but first cold load still downloads Pyodide and
+ pandas from the CDN.
+
+```bash
+node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-cdn --flavor cdn
+node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-self-hosted --flavor self-hosted
+```
+
+To generate the Read the Docs "Try it live" payload before a Sphinx HTML build:
+
+```bash
+node demos/gfql/pyodide/build-bundle.mjs --docs-static --flavor cdn
+```
+
+That writes the bundle to `docs/source/static/gfql/pyodide/`, which is ignored
+by git because it contains generated docs artifacts and local wheels.
+
+The bundle includes `gfql.js`, `browser.html`, `edges.csv`, `manifest.json`,
+`size-report.json`, and wheels under `wheels/`. The `self-hosted` flavor also
+includes `pyodide/`. Serve it with:
+
+```bash
+cd /tmp/pygraphistry-gfql-pyodide-bundle
+python -m http.server 8000
+```
+
+Then open `http://localhost:8000/browser.html`.
+
+Run the browser smoke:
+
+```bash
+npm install --prefix demos/gfql/pyodide --no-audit --no-fund
+npm exec --prefix demos/gfql/pyodide -- playwright install chromium
+node demos/gfql/pyodide/test-browser.mjs /tmp/pygraphistry-gfql-pyodide-bundle
+```
+
+Benchmark it:
+
+```bash
+GFQL_BENCH_SIZES=10,1000,10000 GFQL_BENCH_REPEAT=3 \
+ node /tmp/pygraphistry-gfql-pyodide-bundle/benchmark-node.mjs \
+ /tmp/pygraphistry-gfql-pyodide-bundle
+```
diff --git a/demos/gfql/pyodide/benchmark-node.mjs b/demos/gfql/pyodide/benchmark-node.mjs
new file mode 100644
index 0000000000..69160d92e6
--- /dev/null
+++ b/demos/gfql/pyodide/benchmark-node.mjs
@@ -0,0 +1,134 @@
+import { readFile } from "node:fs/promises";
+import { join, resolve } from "node:path";
+import { performance } from "node:perf_hooks";
+import { createGFQLRuntime } from "./gfql.js";
+
+const bundleDir = resolve(process.argv[2] || "/tmp/pygraphistry-gfql-pyodide-bundle");
+const sizes = (process.env.GFQL_BENCH_SIZES || "10,1000,10000")
+ .split(",")
+ .map((value) => Number(value.trim()))
+ .filter((value) => Number.isFinite(value) && value > 0);
+const repeat = Number(process.env.GFQL_BENCH_REPEAT || "3");
+
+function generateCsv(edgeCount) {
+ const lines = ["src,dst,weight"];
+ for (let i = 0; i < edgeCount; i += 1) {
+ lines.push(`n${i},n${i + 1},${i % 5}`);
+ }
+ return `${lines.join("\n")}\n`;
+}
+
+function median(values) {
+ const sorted = [...values].sort((a, b) => a - b);
+ return sorted[Math.floor(sorted.length / 2)];
+}
+
+async function timed(fn) {
+ const start = performance.now();
+ const value = await fn();
+ return { value, ms: performance.now() - start };
+}
+
+function markdownTable(report) {
+ const lines = [
+ "| edges | AST GFQL median ms | Cypher median ms | returned rows |",
+ "| ---: | ---: | ---: | ---: |",
+ ];
+ for (const row of report.queries) {
+ lines.push(
+ `| ${row.edges} | ${row.astMedianMs.toFixed(1)} | ${row.cypherMedianMs.toFixed(1)} | ${row.rows} |`,
+ );
+ }
+ return lines.join("\n");
+}
+
+async function main() {
+ const manifest = JSON.parse(await readFile(join(bundleDir, "manifest.json"), "utf8"));
+ const sizeReport = JSON.parse(await readFile(join(bundleDir, "size-report.json"), "utf8"));
+ const wheelPath = join(bundleDir, manifest.graphistryWheel.replace("./", ""));
+ const wheelData = new Uint8Array(await readFile(wheelPath));
+ const pyodideModule = process.env.PYODIDE_MODULE || join(bundleDir, "pyodide/pyodide.mjs");
+ if (/^https?:\/\//.test(pyodideModule)) {
+ throw new Error("benchmark-node.mjs needs a local Pyodide module. Build with --flavor self-hosted or set PYODIDE_MODULE to a local pyodide.mjs.");
+ }
+ const requirements = await Promise.all(manifest.requirements.map(async (requirement) => {
+ if (!requirement.startsWith("./")) {
+ return requirement;
+ }
+ const path = join(bundleDir, requirement.replace("./", ""));
+ return {
+ path: `/tmp/${path.split("/").pop()}`,
+ data: new Uint8Array(await readFile(path)),
+ };
+ }));
+
+ const importResult = await timed(() => import(pyodideModule));
+ const runtimeResult = await timed(() => createGFQLRuntime({
+ loadPyodide: importResult.value.loadPyodide,
+ indexURL: manifest.indexURL.startsWith("./")
+ ? `${join(bundleDir, manifest.indexURL.replace("./", ""))}/`
+ : manifest.indexURL,
+ packageBaseUrl: manifest.packageBaseUrl && manifest.packageBaseUrl.startsWith("./")
+ ? `${join(bundleDir, manifest.packageBaseUrl.replace("./", ""))}/`
+ : manifest.packageBaseUrl,
+ pyodidePackages: manifest.pyodidePackages,
+ requirements,
+ graphistryWheel: {
+ path: `/tmp/${wheelPath.split("/").pop()}`,
+ data: wheelData,
+ },
+ }));
+ const runtime = runtimeResult.value;
+
+ const warmCsv = generateCsv(10);
+ await runtime.runEdgeWeightAtLeast({ csv: warmCsv, minWeight: 3 });
+ await runtime.runCypherCsv({
+ csv: warmCsv,
+ query: "MATCH (a)-[e]->(b) WHERE e.weight >= 3 RETURN e",
+ });
+
+ const queries = [];
+ for (const edgeCount of sizes) {
+ const csv = generateCsv(edgeCount);
+ const astTimes = [];
+ const cypherTimes = [];
+ let rows = 0;
+ for (let i = 0; i < repeat; i += 1) {
+ const ast = await timed(() => runtime.runEdgeWeightAtLeast({ csv, minWeight: 3 }));
+ const cypher = await timed(() => runtime.runCypherCsv({
+ csv,
+ query: "MATCH (a)-[e]->(b) WHERE e.weight >= 3 RETURN e",
+ }));
+ astTimes.push(ast.ms);
+ cypherTimes.push(cypher.ms);
+ rows = ast.value.edges.length;
+ }
+ queries.push({
+ edges: edgeCount,
+ rows,
+ astMedianMs: median(astTimes),
+ cypherMedianMs: median(cypherTimes),
+ astMs: astTimes,
+ cypherMs: cypherTimes,
+ });
+ }
+
+ const report = {
+ pyodideVersion: manifest.pyodideVersion,
+ bundleBytes: sizeReport.totalBytes,
+ pyodideBytes: sizeReport.pyodideBytes,
+ wheelsBytes: sizeReport.wheelsBytes,
+ importPyodideModuleMs: importResult.ms,
+ createRuntimeMs: runtimeResult.ms,
+ repeat,
+ queries,
+ };
+
+ console.log(JSON.stringify(report, null, 2));
+ console.log("\n" + markdownTable(report));
+}
+
+main().catch((error) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/demos/gfql/pyodide/browser.html b/demos/gfql/pyodide/browser.html
new file mode 100644
index 0000000000..dda1e3b470
--- /dev/null
+++ b/demos/gfql/pyodide/browser.html
@@ -0,0 +1,422 @@
+
+
+
+
+
+ GFQL Pyodide live demo
+
+
+
+
+
+ GFQL in the browser
+ Pyodide loads a local PyGraphistry wheel, reads CSV edges, and runs GFQL without a Python install or backend.
+
+
+
+
+
Input
+
+ CSV edges
+
+
+
+
+ Min weight
+
+
+
+
+ Cypher
+
+
+
+ Run examples
+ Native GFQL
+ Cypher
+
+
+
+
+
Output
+
+ Ready
+
+
+
+
+
+
+
+
+
+
diff --git a/demos/gfql/pyodide/build-bundle.mjs b/demos/gfql/pyodide/build-bundle.mjs
new file mode 100644
index 0000000000..8b91700eca
--- /dev/null
+++ b/demos/gfql/pyodide/build-bundle.mjs
@@ -0,0 +1,383 @@
+import { copyFile, cp, mkdir, readdir, readFile, rm, stat, writeFile } from "node:fs/promises";
+import { spawnSync } from "node:child_process";
+import { createHash } from "node:crypto";
+import { basename, dirname, join, relative, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const PYODIDE_VERSION = "314.0.0";
+const PYODIDE_CDN_URL = `https://cdn.jsdelivr.net/pyodide/v${PYODIDE_VERSION}/full/`;
+const BUNDLE_FLAVORS = new Set(["self-hosted", "cdn"]);
+const PYODIDE_CORE_PACKAGES = [
+ "micropip",
+ "pandas",
+];
+const PYODIDE_REQUIREMENTS = [
+ "requests",
+ "packaging",
+ "typing-extensions",
+];
+const VENDORED_WHEEL_REQUIREMENTS = [
+ "lark>=1.1,<2",
+];
+
+const scriptDir = dirname(fileURLToPath(import.meta.url));
+const repoRoot = resolve(scriptDir, "../../..");
+const defaultOutDir = "/tmp/pygraphistry-gfql-pyodide-bundle";
+const docsOutDir = join(repoRoot, "docs/source/static/gfql/pyodide");
+
+function parseArgs(argv) {
+ const options = {
+ flavor: process.env.GFQL_PYODIDE_BUNDLE_FLAVOR || "self-hosted",
+ outDir: undefined,
+ };
+ for (let i = 0; i < argv.length; i += 1) {
+ const arg = argv[i];
+ if (arg === "--docs-static") {
+ options.outDir = docsOutDir;
+ } else if (arg === "--flavor") {
+ i += 1;
+ options.flavor = argv[i];
+ } else if (arg.startsWith("--flavor=")) {
+ options.flavor = arg.slice("--flavor=".length);
+ } else if (arg === "--help" || arg === "-h") {
+ console.log([
+ "Usage: node demos/gfql/pyodide/build-bundle.mjs [out-dir] [--docs-static] [--flavor self-hosted|cdn]",
+ "",
+ "Flavors:",
+ " self-hosted Copy Pyodide runtime and required package wheels into the bundle.",
+ " cdn Keep only demo files and wheels; load Pyodide 314 from the pinned CDN.",
+ ].join("\n"));
+ process.exit(0);
+ } else if (arg.startsWith("--")) {
+ throw new Error(`Unknown option: ${arg}`);
+ } else if (!options.outDir) {
+ options.outDir = arg;
+ } else {
+ throw new Error(`Unexpected argument: ${arg}`);
+ }
+ }
+ if (!BUNDLE_FLAVORS.has(options.flavor)) {
+ throw new Error(`Unknown bundle flavor "${options.flavor}". Expected one of: ${[...BUNDLE_FLAVORS].join(", ")}`);
+ }
+ options.outDir = resolve(options.outDir || defaultOutDir);
+ return options;
+}
+
+const buildOptions = parseArgs(process.argv.slice(2));
+const outDir = buildOptions.outDir;
+const bundleFlavor = buildOptions.flavor;
+const workDir = join(outDir, ".work");
+const srcCopy = join(workDir, "src");
+const pyodideNode = join(workDir, "node");
+const wheelDir = join(outDir, "wheels");
+const pyodideOutDir = join(outDir, "pyodide");
+
+async function directorySize(path) {
+ const entry = await stat(path);
+ if (!entry.isDirectory()) {
+ return entry.size;
+ }
+ const children = await readdir(path);
+ let total = 0;
+ for (const child of children) {
+ total += await directorySize(join(path, child));
+ }
+ return total;
+}
+
+async function removeIfExists(path) {
+ await rm(path, { recursive: true, force: true });
+}
+
+async function prunePyodideRuntime(path) {
+ const removable = [
+ "console.html",
+ "console-v2.html",
+ "ffi.d.ts",
+ "package.json",
+ "pyodide.asm.mjs.map",
+ "pyodide.d.ts",
+ "pyodide.js",
+ "pyodide.js.map",
+ "pyodide.mjs.map",
+ "README.md",
+ ];
+ await Promise.all(removable.map((filename) => removeIfExists(join(path, filename))));
+}
+
+function sha256(bytes) {
+ return createHash("sha256").update(bytes).digest("hex");
+}
+
+function collectPyodidePackages(lockFile, packageNames) {
+ const packages = lockFile.packages || {};
+ const seen = new Set();
+
+ function visit(packageName) {
+ if (seen.has(packageName)) {
+ return;
+ }
+ const metadata = packages[packageName];
+ if (!metadata) {
+ throw new Error(`Pyodide lockfile does not include package: ${packageName}`);
+ }
+ seen.add(packageName);
+ for (const dependency of metadata.depends || []) {
+ visit(dependency);
+ }
+ }
+
+ for (const packageName of packageNames) {
+ visit(packageName);
+ }
+
+ return [...seen].sort();
+}
+
+async function fetchBytesWithRetry(url, attempts = 3) {
+ let lastError;
+ for (let attempt = 1; attempt <= attempts; attempt += 1) {
+ try {
+ const response = await fetch(url);
+ if (!response.ok) {
+ throw new Error(`${response.status} ${response.statusText}`);
+ }
+ return Buffer.from(await response.arrayBuffer());
+ } catch (error) {
+ lastError = error;
+ if (attempt < attempts) {
+ await new Promise((resolvePromise) => setTimeout(resolvePromise, attempt * 1000));
+ }
+ }
+ }
+ throw new Error(`Failed to download ${url}: ${lastError?.message || lastError}`);
+}
+
+async function downloadBytes(url, outputPath) {
+ try {
+ const bytes = await fetchBytesWithRetry(url);
+ await writeFile(outputPath, bytes);
+ return bytes;
+ } catch (fetchError) {
+ const result = spawnSync("curl", [
+ "-L",
+ "--fail",
+ "--silent",
+ "--show-error",
+ "--retry", "3",
+ "--output", outputPath,
+ url,
+ ], {
+ encoding: "utf8",
+ stdio: "pipe",
+ });
+ if (result.status !== 0) {
+ throw new Error([
+ `Failed to download ${url}`,
+ `node fetch: ${fetchError?.message || fetchError}`,
+ `curl: ${result.stderr || result.stdout}`,
+ ].join("\n"));
+ }
+ return readFile(outputPath);
+ }
+}
+
+async function downloadPyodidePackages(pyodideDir, packageNames) {
+ const lockFile = JSON.parse(await readFile(join(pyodideDir, "pyodide-lock.json"), "utf8"));
+ const packages = lockFile.packages || {};
+ const resolvedPackages = collectPyodidePackages(lockFile, packageNames);
+ const baseURL = `https://cdn.jsdelivr.net/pyodide/v${PYODIDE_VERSION}/full`;
+
+ for (const packageName of resolvedPackages) {
+ const metadata = packages[packageName];
+ const outputPath = join(pyodideDir, metadata.file_name);
+ let bytes;
+ try {
+ bytes = await readFile(outputPath);
+ } catch {
+ bytes = await downloadBytes(`${baseURL}/${metadata.file_name}`, outputPath);
+ }
+ if (sha256(bytes) !== metadata.sha256) {
+ throw new Error(`Checksum mismatch for ${metadata.file_name}`);
+ }
+ }
+
+ return resolvedPackages;
+}
+
+function run(command, args, options = {}) {
+ const result = spawnSync(command, args, {
+ cwd: options.cwd || repoRoot,
+ env: { ...process.env, ...options.env },
+ encoding: "utf8",
+ stdio: options.capture ? "pipe" : "inherit",
+ });
+ if (result.status !== 0) {
+ throw new Error(`${command} ${args.join(" ")} failed with exit ${result.status}`);
+ }
+ return result;
+}
+
+function sourceCopyExcludes() {
+ const excludes = [
+ ".git",
+ "plans",
+ "uv.lock",
+ "=2",
+ ];
+ const relativeOutDir = relative(repoRoot, outDir);
+ if (relativeOutDir && relativeOutDir !== "." && !relativeOutDir.startsWith("..")) {
+ excludes.push(relativeOutDir, `${relativeOutDir}/***`);
+ }
+ return excludes.flatMap((pattern) => ["--exclude", pattern]);
+}
+
+async function main() {
+ await rm(outDir, { recursive: true, force: true });
+ await mkdir(wheelDir, { recursive: true });
+ await mkdir(workDir, { recursive: true });
+
+ run("rsync", [
+ "-a",
+ ...sourceCopyExcludes(),
+ "./",
+ `${srcCopy}/`,
+ ]);
+
+ run("uv", [
+ "run",
+ "--no-project",
+ "--with", "build",
+ "python",
+ "-m", "build",
+ "--wheel",
+ "--outdir", wheelDir,
+ srcCopy,
+ ]);
+
+ run("uv", [
+ "run",
+ "--no-project",
+ "--with", "pip",
+ "python",
+ "-m", "pip",
+ "download",
+ "--only-binary=:all:",
+ "--dest", wheelDir,
+ ...VENDORED_WHEEL_REQUIREMENTS,
+ ]);
+
+ run("npm", [
+ "install",
+ "--prefix", pyodideNode,
+ "--no-audit",
+ "--no-fund",
+ "--ignore-scripts",
+ `pyodide@${PYODIDE_VERSION}`,
+ ]);
+ const pyodidePackageDir = join(pyodideNode, "node_modules/pyodide");
+ const vendoredPyodidePackages = await downloadPyodidePackages(pyodidePackageDir, [
+ ...PYODIDE_CORE_PACKAGES,
+ ...PYODIDE_REQUIREMENTS,
+ ]);
+
+ const graphistryWheel = run("bash", [
+ "-lc",
+ `ls ${JSON.stringify(wheelDir)}/graphistry-*.whl | head -1`,
+ ], { capture: true }).stdout.trim();
+ const requirementWheelPaths = run("bash", [
+ "-lc",
+ `find ${JSON.stringify(wheelDir)} -maxdepth 1 -name '*.whl' ! -name 'graphistry-*.whl' -print | sort`,
+ ], { capture: true }).stdout.trim().split("\n").filter(Boolean);
+
+ run("node", [
+ join(scriptDir, "run-node.mjs"),
+ graphistryWheel,
+ ], {
+ env: {
+ PYODIDE_MODULE: join(pyodideNode, "node_modules/pyodide/pyodide.mjs"),
+ GFQL_REQUIREMENT_WHEELS: requirementWheelPaths.join(":"),
+ },
+ });
+
+ if (bundleFlavor === "self-hosted") {
+ await cp(join(pyodideNode, "node_modules/pyodide"), pyodideOutDir, {
+ recursive: true,
+ });
+ await prunePyodideRuntime(pyodideOutDir);
+ }
+
+ for (const filename of ["benchmark-node.mjs", "browser.html", "edges.csv", "gfql.js", "package.json", "run-node.mjs", "test-browser.mjs"]) {
+ await copyFile(join(scriptDir, filename), join(outDir, filename));
+ }
+
+ const wheelFiles = run("bash", [
+ "-lc",
+ `find ${JSON.stringify(wheelDir)} -maxdepth 1 -name '*.whl' -printf '%f\\n' | sort`,
+ ], { capture: true }).stdout.trim().split("\n").filter(Boolean);
+ const graphistryWheelName = basename(graphistryWheel);
+ const requirementEntries = wheelFiles
+ .filter((filename) => filename !== graphistryWheelName)
+ .map((filename) => `./wheels/${filename}`);
+
+ await writeFile(join(outDir, "manifest.json"), `${JSON.stringify({
+ pyodideVersion: PYODIDE_VERSION,
+ flavor: bundleFlavor,
+ pyodideModule: bundleFlavor === "self-hosted" ? "./pyodide/pyodide.mjs" : `${PYODIDE_CDN_URL}pyodide.mjs`,
+ indexURL: bundleFlavor === "self-hosted" ? "./pyodide/" : PYODIDE_CDN_URL,
+ packageBaseUrl: bundleFlavor === "self-hosted" ? "./pyodide/" : PYODIDE_CDN_URL,
+ pyodidePackages: vendoredPyodidePackages,
+ graphistryWheel: `./wheels/${graphistryWheelName}`,
+ requirements: [
+ ...requirementEntries,
+ ...PYODIDE_REQUIREMENTS,
+ ],
+ }, null, 2)}\n`);
+
+ await writeFile(join(outDir, "README.txt"), [
+ "GFQL Pyodide bundle",
+ "",
+ `Flavor: ${bundleFlavor}`,
+ `Built from ${relative(process.cwd(), repoRoot) || "."}`,
+ "",
+ "Serve locally:",
+ ` cd ${outDir}`,
+ " python -m http.server 8000",
+ " open http://localhost:8000/browser.html",
+ "",
+ "Node smoke:",
+ bundleFlavor === "self-hosted"
+ ? ` PYODIDE_MODULE=${join(outDir, "pyodide/pyodide.mjs")} node ${join(outDir, "run-node.mjs")} ${join(outDir, "wheels", graphistryWheelName)}`
+ : " Build with --flavor self-hosted for an offline Node smoke target, or use the browser smoke.",
+ "",
+ "Browser smoke:",
+ ` node ${join(outDir, "test-browser.mjs")} ${outDir}`,
+ "",
+ "Benchmark:",
+ ` node ${join(outDir, "benchmark-node.mjs")} ${outDir}`,
+ "",
+ ].join("\n"));
+
+ await rm(workDir, { recursive: true, force: true });
+
+ const sizeReport = {
+ totalBytes: 0,
+ flavor: bundleFlavor,
+ pyodideBytes: bundleFlavor === "self-hosted" ? await directorySize(pyodideOutDir) : 0,
+ wheelsBytes: await directorySize(wheelDir),
+ generatedAt: new Date().toISOString(),
+ };
+ await writeFile(join(outDir, "size-report.json"), `${JSON.stringify(sizeReport, null, 2)}\n`);
+ sizeReport.totalBytes = await directorySize(outDir);
+ await writeFile(join(outDir, "size-report.json"), `${JSON.stringify(sizeReport, null, 2)}\n`);
+
+ const manifest = await readFile(join(outDir, "manifest.json"), "utf8");
+ console.log(`\nBundle written to ${outDir}`);
+ console.log(manifest);
+ console.log(JSON.stringify(sizeReport, null, 2));
+}
+
+main().catch((error) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/demos/gfql/pyodide/edges.csv b/demos/gfql/pyodide/edges.csv
new file mode 100644
index 0000000000..180d666965
--- /dev/null
+++ b/demos/gfql/pyodide/edges.csv
@@ -0,0 +1,4 @@
+src,dst,weight
+alice,bob,1
+bob,carol,2
+alice,carol,3
diff --git a/demos/gfql/pyodide/gfql.js b/demos/gfql/pyodide/gfql.js
new file mode 100644
index 0000000000..9602f800d0
--- /dev/null
+++ b/demos/gfql/pyodide/gfql.js
@@ -0,0 +1,238 @@
+export const DEFAULT_PYODIDE_INDEX_URL = "https://cdn.jsdelivr.net/pyodide/v314.0.0/full/";
+
+const DEFAULT_PYODIDE_PACKAGES = [
+ "micropip",
+ "pandas",
+ "requests",
+ "packaging",
+ "typing-extensions",
+];
+const DEFAULT_GRAPHISTRY_REQUIREMENTS = [
+ "lark>=1.1,<2",
+];
+
+function setGlobals(pyodide, values) {
+ for (const [key, value] of Object.entries(values)) {
+ pyodide.globals.set(key, value);
+ }
+}
+
+async function installGraphistryWheel(pyodide, graphistryWheel) {
+ if (!graphistryWheel) {
+ return;
+ }
+
+ let wheelTarget = graphistryWheel;
+ if (typeof graphistryWheel !== "string") {
+ wheelTarget = graphistryWheel.path || "/tmp/graphistry-pyodide.whl";
+ pyodide.FS.writeFile(wheelTarget, graphistryWheel.data);
+ setGlobals(pyodide, { _gfql_graphistry_wheel: wheelTarget });
+ await pyodide.runPythonAsync(`
+import sysconfig
+import zipfile
+from pathlib import PurePosixPath
+
+with zipfile.ZipFile(_gfql_graphistry_wheel) as _gfql_wheel:
+ for _gfql_member in _gfql_wheel.infolist():
+ _gfql_path = PurePosixPath(_gfql_member.filename)
+ if _gfql_path.is_absolute() or ".." in _gfql_path.parts:
+ raise ValueError(f"Unsafe wheel member path: {_gfql_member.filename}")
+ _gfql_wheel.extractall(sysconfig.get_paths()["purelib"])
+`);
+ return;
+ }
+
+ setGlobals(pyodide, { _gfql_graphistry_wheel: wheelTarget });
+ await pyodide.runPythonAsync(`
+import micropip
+await micropip.install(_gfql_graphistry_wheel, deps=False)
+`);
+}
+
+function mountRequirementWheels(pyodide, requirements) {
+ return requirements.map((requirement, index) => {
+ if (typeof requirement === "string") {
+ return requirement;
+ }
+ const path = requirement.path || `/tmp/gfql-requirement-${index}.whl`;
+ pyodide.FS.writeFile(path, requirement.data);
+ return `emfs:${path}`;
+ });
+}
+
+async function retryAsync(fn, { attempts = 3, delayMs = 1000 } = {}) {
+ let lastError;
+ for (let attempt = 1; attempt <= attempts; attempt += 1) {
+ try {
+ return await fn();
+ } catch (error) {
+ lastError = error;
+ if (attempt < attempts) {
+ await new Promise((resolve) => setTimeout(resolve, delayMs * attempt));
+ }
+ }
+ }
+ throw lastError;
+}
+
+export async function createGFQLRuntime({
+ loadPyodide,
+ indexURL,
+ packageBaseUrl,
+ pyodidePackages = DEFAULT_PYODIDE_PACKAGES,
+ requirements = DEFAULT_GRAPHISTRY_REQUIREMENTS,
+ graphistryWheel,
+ stdout,
+ stderr,
+} = {}) {
+ if (!loadPyodide) {
+ throw new Error("createGFQLRuntime requires a loadPyodide function");
+ }
+
+ const loadPyodideOptions = { stdout, stderr };
+ if (indexURL) {
+ loadPyodideOptions.indexURL = indexURL;
+ }
+ if (packageBaseUrl) {
+ loadPyodideOptions.packageBaseUrl = packageBaseUrl;
+ }
+
+ const pyodide = await loadPyodide(loadPyodideOptions);
+ await retryAsync(() => pyodide.loadPackage(pyodidePackages));
+
+ setGlobals(pyodide, { _gfql_requirements: mountRequirementWheels(pyodide, requirements) });
+ await pyodide.runPythonAsync(`
+import micropip
+await micropip.install(_gfql_requirements)
+`);
+ await installGraphistryWheel(pyodide, graphistryWheel);
+
+ await pyodide.runPythonAsync(`
+import copy
+import pandas as pd
+from graphistry.compute.ComputeMixin import ComputeMixin
+from graphistry.compute import e, ge
+
+class GFQLMiniGraph(ComputeMixin):
+ def __init__(self):
+ super().__init__()
+ self._edges = None
+ self._nodes = None
+ self._source = None
+ self._destination = None
+ self._node = None
+ self._edge = None
+
+ def bind(self, source=None, destination=None, node=None, edge=None, **kwargs):
+ out = copy.copy(self)
+ if source is not None:
+ out._source = source
+ if destination is not None:
+ out._destination = destination
+ if node is not None:
+ out._node = node
+ if edge is not None:
+ out._edge = edge
+ return out
+
+ def edges(self, edges, source=None, destination=None, edge=None, **kwargs):
+ if callable(edges):
+ edges = edges(self)
+ out = self.bind(source=source, destination=destination, edge=edge)
+ out._edges = edges
+ return out
+
+ def nodes(self, nodes, node=None, **kwargs):
+ if callable(nodes):
+ nodes = nodes(self)
+ out = self.bind(node=node)
+ out._nodes = nodes
+ return out
+
+def _gfql_graph(edges, nodes, source, destination, node):
+ return GFQLMiniGraph().edges(edges, source, destination).nodes(nodes, node)
+`);
+
+ return new GFQLRuntime(pyodide);
+}
+
+export class GFQLRuntime {
+ constructor(pyodide) {
+ this.pyodide = pyodide;
+ }
+
+ async runEdgeWeightAtLeast({
+ csv,
+ source = "src",
+ destination = "dst",
+ weightColumn = "weight",
+ minWeight = 2,
+ }) {
+ setGlobals(this.pyodide, {
+ _gfql_csv: csv,
+ _gfql_source: source,
+ _gfql_destination: destination,
+ _gfql_weight_column: weightColumn,
+ _gfql_min_weight: minWeight,
+ });
+
+ const jsonText = await this.pyodide.runPythonAsync(`
+import io
+import json
+import pandas as pd
+from graphistry.compute import e, ge
+
+def _records(df):
+ if df is None:
+ return []
+ return json.loads(df.to_json(orient="records"))
+
+_edges = pd.read_csv(io.StringIO(_gfql_csv))
+_node_ids = pd.unique(_edges[[_gfql_source, _gfql_destination]].to_numpy().ravel())
+_nodes = pd.DataFrame({"id": _node_ids})
+_graph = _gfql_graph(_edges, _nodes, _gfql_source, _gfql_destination, "id")
+_result = _graph.gfql([e(edge_match={_gfql_weight_column: ge(_gfql_min_weight)})])
+json.dumps({
+ "edges": _records(getattr(_result, "_edges", None)),
+ "nodes": _records(getattr(_result, "_nodes", None)),
+})
+`);
+ return JSON.parse(jsonText);
+ }
+
+ async runCypherCsv({
+ csv,
+ query,
+ source = "src",
+ destination = "dst",
+ }) {
+ setGlobals(this.pyodide, {
+ _gfql_csv: csv,
+ _gfql_query: query,
+ _gfql_source: source,
+ _gfql_destination: destination,
+ });
+
+ const jsonText = await this.pyodide.runPythonAsync(`
+import io
+import json
+import pandas as pd
+
+def _records(df):
+ if df is None:
+ return []
+ return json.loads(df.to_json(orient="records"))
+
+_edges = pd.read_csv(io.StringIO(_gfql_csv))
+_node_ids = pd.unique(_edges[[_gfql_source, _gfql_destination]].to_numpy().ravel())
+_nodes = pd.DataFrame({"id": _node_ids})
+_graph = _gfql_graph(_edges, _nodes, _gfql_source, _gfql_destination, "id")
+_result = _graph.gfql(_gfql_query, language="cypher")
+json.dumps({
+ "edges": _records(getattr(_result, "_edges", None)),
+ "nodes": _records(getattr(_result, "_nodes", None)),
+})
+`);
+ return JSON.parse(jsonText);
+ }
+}
diff --git a/demos/gfql/pyodide/package-lock.json b/demos/gfql/pyodide/package-lock.json
new file mode 100644
index 0000000000..3dc1973391
--- /dev/null
+++ b/demos/gfql/pyodide/package-lock.json
@@ -0,0 +1,59 @@
+{
+ "name": "pyodide",
+ "lockfileVersion": 3,
+ "requires": true,
+ "packages": {
+ "": {
+ "devDependencies": {
+ "playwright": "1.60.0"
+ }
+ },
+ "node_modules/fsevents": {
+ "version": "2.3.2",
+ "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
+ "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+ "dev": true,
+ "hasInstallScript": true,
+ "license": "MIT",
+ "optional": true,
+ "os": [
+ "darwin"
+ ],
+ "engines": {
+ "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+ }
+ },
+ "node_modules/playwright": {
+ "version": "1.60.0",
+ "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.60.0.tgz",
+ "integrity": "sha512-hheHdokM8cdqCb0lcE3s+zT4t4W+vvjpGxsZlDnikarzx8tSzMebh3UiFtgqwFwnTnjYQcsyMF8ei2mCO/tpeA==",
+ "dev": true,
+ "license": "Apache-2.0",
+ "dependencies": {
+ "playwright-core": "1.60.0"
+ },
+ "bin": {
+ "playwright": "cli.js"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "optionalDependencies": {
+ "fsevents": "2.3.2"
+ }
+ },
+ "node_modules/playwright-core": {
+ "version": "1.60.0",
+ "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.60.0.tgz",
+ "integrity": "sha512-9bW6zvX/m0lEbgTKJ6YppOKx8H3VOPBMOCFh2irXFOT4BbHgrx5hPjwJYLT40Lu+4qtD36qKc/Hn56StUW57IA==",
+ "dev": true,
+ "license": "Apache-2.0",
+ "bin": {
+ "playwright-core": "cli.js"
+ },
+ "engines": {
+ "node": ">=18"
+ }
+ }
+ }
+}
diff --git a/demos/gfql/pyodide/package.json b/demos/gfql/pyodide/package.json
new file mode 100644
index 0000000000..3b00b8b6f2
--- /dev/null
+++ b/demos/gfql/pyodide/package.json
@@ -0,0 +1,12 @@
+{
+ "type": "module",
+ "scripts": {
+ "build": "node build-bundle.mjs",
+ "build:cdn": "node build-bundle.mjs --flavor cdn",
+ "build:self-hosted": "node build-bundle.mjs --flavor self-hosted",
+ "test:browser": "node test-browser.mjs"
+ },
+ "devDependencies": {
+ "playwright": "1.60.0"
+ }
+}
diff --git a/demos/gfql/pyodide/run-node.mjs b/demos/gfql/pyodide/run-node.mjs
new file mode 100644
index 0000000000..9ab106c582
--- /dev/null
+++ b/demos/gfql/pyodide/run-node.mjs
@@ -0,0 +1,54 @@
+import { readFile } from "node:fs/promises";
+import { basename, dirname, resolve } from "node:path";
+import { createGFQLRuntime } from "./gfql.js";
+
+const wheelPath = process.argv[2];
+if (!wheelPath) {
+ throw new Error("Usage: node demos/gfql/pyodide/run-node.mjs ");
+}
+
+const pyodideModule = process.env.PYODIDE_MODULE || "pyodide";
+const pyodideIndexURL = process.env.PYODIDE_INDEX_URL
+ || (pyodideModule === "pyodide" ? undefined : dirname(resolve(pyodideModule)));
+const { loadPyodide } = await import(pyodideModule);
+const csv = await readFile(new URL("./edges.csv", import.meta.url), "utf8");
+const wheelBytes = await readFile(resolve(wheelPath));
+const requirementWheelPaths = (process.env.GFQL_REQUIREMENT_WHEELS || "")
+ .split(":")
+ .map((value) => value.trim())
+ .filter(Boolean);
+const requirements = await Promise.all(requirementWheelPaths.map(async (requirementPath) => ({
+ path: `/tmp/${basename(requirementPath)}`,
+ data: await readFile(resolve(requirementPath)),
+})));
+
+const runtime = await createGFQLRuntime({
+ loadPyodide,
+ ...(pyodideIndexURL ? { indexURL: pyodideIndexURL } : {}),
+ ...(requirements.length > 0 ? { requirements } : {}),
+ graphistryWheel: {
+ path: `/tmp/${basename(wheelPath)}`,
+ data: wheelBytes,
+ },
+});
+
+const astResult = await runtime.runEdgeWeightAtLeast({ csv, minWeight: 2 });
+if (astResult.edges.length !== 2) {
+ throw new Error(`Expected 2 AST GFQL edges, got ${astResult.edges.length}`);
+}
+if (!astResult.edges.every((edge) => edge.weight >= 2)) {
+ throw new Error(`Expected AST GFQL weights >= 2: ${JSON.stringify(astResult.edges)}`);
+}
+
+const cypherResult = await runtime.runCypherCsv({
+ csv,
+ query: "MATCH (a)-[e]->(b) WHERE e.weight >= 2 RETURN e",
+});
+if (cypherResult.nodes.length !== 2) {
+ throw new Error(`Expected 2 Cypher rows, got ${cypherResult.nodes.length}`);
+}
+
+console.log(JSON.stringify({
+ astEdges: astResult.edges,
+ cypherRows: cypherResult.nodes,
+}, null, 2));
diff --git a/demos/gfql/pyodide/test-browser.mjs b/demos/gfql/pyodide/test-browser.mjs
new file mode 100644
index 0000000000..41c36ba375
--- /dev/null
+++ b/demos/gfql/pyodide/test-browser.mjs
@@ -0,0 +1,167 @@
+import { createServer } from "node:http";
+import { readFile, stat } from "node:fs/promises";
+import { extname, normalize, resolve, sep } from "node:path";
+import { pathToFileURL } from "node:url";
+
+const bundleDir = resolve(process.argv[2] || "/tmp/pygraphistry-gfql-pyodide-bundle");
+const timeoutMs = Number(process.env.GFQL_BROWSER_TIMEOUT_MS || "120000");
+const screenshotPath = process.env.GFQL_BROWSER_SCREENSHOT;
+
+const contentTypes = new Map([
+ [".csv", "text/csv; charset=utf-8"],
+ [".html", "text/html; charset=utf-8"],
+ [".js", "text/javascript; charset=utf-8"],
+ [".json", "application/json; charset=utf-8"],
+ [".mjs", "text/javascript; charset=utf-8"],
+ [".txt", "text/plain; charset=utf-8"],
+ [".wasm", "application/wasm"],
+ [".whl", "application/octet-stream"],
+ [".zip", "application/zip"],
+]);
+
+async function fileExists(path) {
+ try {
+ return (await stat(path)).isFile();
+ } catch {
+ return false;
+ }
+}
+
+function resolveRequestPath(urlPath) {
+ const decoded = decodeURIComponent(urlPath.split("?")[0]);
+ const relativePath = normalize(decoded === "/" ? "browser.html" : decoded.slice(1));
+ const absolutePath = resolve(bundleDir, relativePath);
+ if (absolutePath !== bundleDir && !absolutePath.startsWith(`${bundleDir}${sep}`)) {
+ return undefined;
+ }
+ return absolutePath;
+}
+
+async function startServer() {
+ const server = createServer(async (request, response) => {
+ try {
+ const requestPath = resolveRequestPath(request.url || "/");
+ if (!requestPath || !(await fileExists(requestPath))) {
+ response.writeHead(404, { "content-type": "text/plain; charset=utf-8" });
+ response.end("not found");
+ return;
+ }
+ const body = await readFile(requestPath);
+ response.writeHead(200, {
+ "content-length": body.length,
+ "content-type": contentTypes.get(extname(requestPath)) || "application/octet-stream",
+ });
+ if (request.method === "HEAD") {
+ response.end();
+ } else {
+ response.end(body);
+ }
+ } catch (error) {
+ response.writeHead(500, { "content-type": "text/plain; charset=utf-8" });
+ response.end(String(error.stack || error.message || error));
+ }
+ });
+
+ await new Promise((resolvePromise) => server.listen(0, "127.0.0.1", resolvePromise));
+ const address = server.address();
+ return {
+ server,
+ baseURL: `http://127.0.0.1:${address.port}`,
+ };
+}
+
+async function importPlaywright() {
+ const candidates = [
+ process.env.PLAYWRIGHT_MODULE,
+ "playwright",
+ pathToFileURL(resolve(bundleDir, "node_modules/playwright/index.mjs")).href,
+ pathToFileURL(resolve(process.cwd(), "demos/gfql/pyodide/node_modules/playwright/index.mjs")).href,
+ pathToFileURL(resolve(process.cwd(), "node_modules/playwright/index.mjs")).href,
+ ].filter(Boolean);
+
+ const errors = [];
+ for (const candidate of candidates) {
+ if (candidate.startsWith("file://") && !(await fileExists(new URL(candidate)))) {
+ continue;
+ }
+ try {
+ return await import(candidate);
+ } catch (error) {
+ errors.push(`${candidate}: ${error.message}`);
+ }
+ }
+
+ throw new Error([
+ "Playwright is not installed or could not be imported.",
+ "Run `npm install --prefix demos/gfql/pyodide` or set PLAYWRIGHT_MODULE.",
+ ...errors,
+ ].join("\n"));
+}
+
+async function main() {
+ const { chromium } = await importPlaywright();
+
+ const { server, baseURL } = await startServer();
+ const browser = await chromium.launch({
+ headless: true,
+ args: ["--no-sandbox"],
+ });
+ const page = await browser.newPage();
+ page.setDefaultTimeout(timeoutMs);
+
+ const pageErrors = [];
+ page.on("pageerror", (error) => pageErrors.push(error.message));
+ page.on("console", (message) => {
+ if (message.type() === "error") {
+ pageErrors.push(message.text());
+ }
+ });
+
+ try {
+ await page.goto(`${baseURL}/browser.html`, { waitUntil: "domcontentloaded" });
+ await page.waitForFunction(() => {
+ const raw = document.querySelector("#jsonOutput")?.textContent || "";
+ try {
+ const parsed = JSON.parse(raw);
+ return parsed.astEdges?.length === 3 && parsed.cypherRows?.length === 3;
+ } catch {
+ return false;
+ }
+ });
+
+ const result = await page.evaluate(() => JSON.parse(document.querySelector("#jsonOutput").textContent));
+ const status = await page.textContent("#status");
+ const metrics = await page.$$eval("#metrics tbody tr", (rows) => rows.map((row) => row.textContent.trim()));
+
+ if (!status.includes("Ready")) {
+ throw new Error(`Expected Ready status, got: ${status}`);
+ }
+ if (!metrics.some((text) => text.includes("Create Pyodide GFQL runtime"))) {
+ throw new Error(`Expected runtime creation metric, got: ${JSON.stringify(metrics)}`);
+ }
+ if (!result.astEdges.every((edge) => edge.weight >= 2)) {
+ throw new Error(`Expected AST edge weights >= 2, got: ${JSON.stringify(result.astEdges)}`);
+ }
+ if (pageErrors.length > 0) {
+ throw new Error(`Browser console/page errors:\n${pageErrors.join("\n")}`);
+ }
+ if (screenshotPath) {
+ await page.screenshot({ path: screenshotPath, fullPage: true });
+ }
+ console.log(JSON.stringify({
+ ok: true,
+ url: `${baseURL}/browser.html`,
+ astEdges: result.astEdges.length,
+ cypherRows: result.cypherRows.length,
+ metrics,
+ }, null, 2));
+ } finally {
+ await browser.close();
+ await new Promise((resolvePromise) => server.close(resolvePromise));
+ }
+}
+
+main().catch((error) => {
+ console.error(error);
+ process.exitCode = 1;
+});
diff --git a/docs/source/gfql/index.rst b/docs/source/gfql/index.rst
index 8362feb6f7..c6547406dd 100644
--- a/docs/source/gfql/index.rst
+++ b/docs/source/gfql/index.rst
@@ -56,6 +56,7 @@ See also:
combo
quick
cypher
+ pyodide
where
return
predicates/quick
diff --git a/docs/source/gfql/pyodide.rst b/docs/source/gfql/pyodide.rst
new file mode 100644
index 0000000000..ccf5cc280b
--- /dev/null
+++ b/docs/source/gfql/pyodide.rst
@@ -0,0 +1,293 @@
+GFQL in Pyodide
+================
+
+This page shows how to run a small GFQL workload in JavaScript with
+`Pyodide `__.
+
+Why Pyodide 314?
+----------------
+
+Pyodide 314 aligns Pyodide versioning with Python 3.14, ships as a native ES
+module runtime, and standardizes package publishing around PyEmscripten wheels.
+That makes it a good baseline for a browser-side GFQL proof: the JavaScript
+entrypoint can import Pyodide as an ES module, load Pyodide-native packages
+such as ``pandas`` and ``pyarrow``, and install pure Python wheels at runtime.
+
+For this GFQL demo, the important runtime pieces are:
+
+- Pyodide packages: ``micropip``, ``pandas``, ``requests``, ``packaging``, and
+ ``typing-extensions``.
+- Pure Python wheels: ``lark`` for the local Cypher parser.
+- A pure Python wheel for this repository.
+
+The browser demo intentionally avoids the top-level ``graphistry.edges(...)``
+constructor and uses a small in-Pyodide graph object backed by
+``ComputeMixin``. That keeps the live demo on a pandas-only path. ``pyarrow``
+is still useful for normal PyGraphistry upload and Arrow workflows, but it is
+not loaded for this CSV/GFQL browser proof.
+
+Build the bundle
+----------------
+
+From the repository root:
+
+.. code-block:: bash
+
+ node demos/gfql/pyodide/build-bundle.mjs /tmp/pygraphistry-gfql-pyodide-bundle
+
+Choose a bundle flavor:
+
+- ``self-hosted`` copies Pyodide, Python stdlib, and required Pyodide wheels
+ into the bundle. It is the most reproducible/offline option.
+- ``cdn`` publishes only the demo files plus Graphistry/``lark`` wheels and
+ loads the pinned Pyodide 314 runtime/packages from jsDelivr. It keeps the
+ hosted artifact small, but first cold load still downloads Pyodide and
+ ``pandas`` from the CDN.
+
+.. code-block:: bash
+
+ node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-cdn --flavor cdn
+ node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-self-hosted --flavor self-hosted
+
+For Read the Docs, build the small CDN-backed flavor into the Sphinx static tree
+before the HTML build:
+
+.. code-block:: bash
+
+ node demos/gfql/pyodide/build-bundle.mjs --docs-static --flavor cdn
+
+This writes the live demo under ``docs/source/static/gfql/pyodide/``. The
+directory is ignored by git because it contains generated wheels and static
+assets; publish it as a generated docs artifact, not as checked-in source.
+The Read the Docs build runs this command before the HTML build so the live
+page is included in the published ``_static`` directory.
+
+The builder:
+
+1. Copies the repo to a temporary build directory.
+2. Builds a pure Python ``graphistry`` wheel.
+3. Downloads wheels that Pyodide does not ship directly, currently ``lark``.
+4. Installs ``pyodide@314.0.0``, resolves the required Pyodide package closure
+ from the Pyodide lockfile, and verifies checksums for downloaded wheels.
+5. Writes a static bundle containing ``pyodide/``, ``gfql.js``, ``edges.csv``,
+ ``browser.html``, ``manifest.json``, ``size-report.json``, and wheels under
+ ``wheels/``. The ``cdn`` flavor omits ``pyodide/`` and points the manifest
+ at the pinned CDN runtime instead.
+6. Prunes non-runtime Pyodide files such as source maps, legacy console pages,
+ and TypeScript declarations.
+
+Try it live
+-----------
+
+When the generated static bundle is published with the docs, open the live
+browser demo:
+
+.. raw:: html
+
+
+
+ Try GFQL in your browser
+
+
+
+The live page runs entirely in the browser: it loads Pyodide, installs the
+bundled Graphistry wheel and small pure-Python dependencies, reads the sample
+CSV, then executes both a native GFQL predicate query and a Cypher-style
+``MATCH`` query. The page includes editable CSV, editable Cypher, rendered
+tables, raw JSON, and per-step timing.
+
+If the link returns 404 in a local docs build, generate the static bundle first
+with ``node demos/gfql/pyodide/build-bundle.mjs --docs-static`` and rebuild the
+HTML docs.
+
+Run the browser tutorial
+------------------------
+
+Serve the generated directory as static files:
+
+.. code-block:: bash
+
+ cd /tmp/pygraphistry-gfql-pyodide-bundle
+ python -m http.server 8000
+
+Then open:
+
+.. code-block:: text
+
+ http://localhost:8000/browser.html
+
+The page loads ``pyodide/pyodide.mjs``, installs the local wheels listed in
+``manifest.json``, loads ``edges.csv``, and runs:
+
+.. code-block:: javascript
+
+ const astResult = await runtime.runEdgeWeightAtLeast({ csv, minWeight: 2 });
+ const cypherResult = await runtime.runCypherCsv({
+ csv,
+ query: "MATCH (a)-[e]->(b) WHERE e.weight >= 2 RETURN e",
+ });
+
+The expected output is:
+
+.. code-block:: json
+
+ {
+ "astEdges": [
+ {"src": "bob", "dst": "carol", "weight": 2},
+ {"src": "alice", "dst": "carol", "weight": 3}
+ ],
+ "cypherRows": [
+ {"e": "[{weight: 2}]"},
+ {"e": "[{weight: 3}]"}
+ ]
+ }
+
+Run the Node smoke
+------------------
+
+The same bundle can be checked in Node:
+
+.. code-block:: bash
+
+ PYODIDE_MODULE=/tmp/pygraphistry-gfql-pyodide-bundle/pyodide/pyodide.mjs \
+ node /tmp/pygraphistry-gfql-pyodide-bundle/run-node.mjs \
+ /tmp/pygraphistry-gfql-pyodide-bundle/wheels/graphistry-0+unknown-py3-none-any.whl
+
+Benchmark and size report
+-------------------------
+
+The builder writes ``size-report.json``. Recent local builds of this demo
+reported:
+
+.. list-table::
+ :header-rows: 1
+
+ * - Flavor
+ - Bundle part
+ - Approximate bytes
+ - Approximate size
+ * - ``cdn``
+ - Total static bundle
+ - 975,000
+ - 0.9 MiB
+ * - ``cdn``
+ - Pyodide runtime and package cache
+ - 0
+ - CDN-backed
+ * - ``cdn``
+ - Graphistry and ``lark`` wheels
+ - 943,000
+ - 0.9 MiB
+ * - ``self-hosted``
+ - Total static bundle
+ - 23,126,000
+ - 22.0 MiB
+ * - ``self-hosted``
+ - Pyodide runtime and package cache
+ - 22,151,080
+ - 21.1 MiB
+ * - ``self-hosted``
+ - Graphistry and ``lark`` wheels
+ - 943,000
+ - 0.9 MiB
+
+Run the benchmark:
+
+.. code-block:: bash
+
+ node /tmp/pygraphistry-gfql-pyodide-bundle/benchmark-node.mjs \
+ /tmp/pygraphistry-gfql-pyodide-bundle
+
+Set ``GFQL_BENCH_SIZES`` and ``GFQL_BENCH_REPEAT`` to change the workload:
+
+.. code-block:: bash
+
+ GFQL_BENCH_SIZES=10,1000,10000 GFQL_BENCH_REPEAT=3 \
+ node /tmp/pygraphistry-gfql-pyodide-bundle/benchmark-node.mjs \
+ /tmp/pygraphistry-gfql-pyodide-bundle
+
+Example local Node timings after one warmup run:
+
+.. list-table::
+ :header-rows: 1
+
+ * - Edges
+ - Native GFQL median
+ - Cypher median
+ - Returned rows
+ * - 10
+ - 43.0 ms
+ - 95.9 ms
+ - 4
+ * - 1,000
+ - 51.2 ms
+ - 105.7 ms
+ - 400
+ * - 10,000
+ - 138.2 ms
+ - 179.4 ms
+ - 4,000
+
+In the same run, creating the Pyodide GFQL runtime took about 6.82 seconds.
+Browser numbers will vary with network, cache state, CPU, and whether the
+server compresses static assets.
+
+Run the browser smoke
+---------------------
+
+The browser smoke starts a local static server, opens ``browser.html`` in
+Chromium, waits for the auto-run output, and checks that both native GFQL and
+Cypher return the expected rows:
+
+.. code-block:: bash
+
+ npm install --prefix demos/gfql/pyodide --no-audit --no-fund
+ npm exec --prefix demos/gfql/pyodide -- playwright install chromium
+ node demos/gfql/pyodide/build-bundle.mjs /tmp/gfql-cdn --flavor cdn
+ node demos/gfql/pyodide/test-browser.mjs /tmp/gfql-cdn
+
+CI runs the same smoke against the ``cdn`` flavor. A recent cold CDN browser
+run took about 46.6 seconds to create the Pyodide runtime, then 78 ms for the
+native GFQL query and 487 ms for the Cypher query. Warm browser/cache behavior
+should be faster; keep release numbers benchmark-driven.
+
+Hosting and versioning
+----------------------
+
+The generated RTD demo uses the ``cdn`` flavor under
+``_static/gfql/pyodide/``. That keeps the published docs artifact small while
+pinning all Pyodide URLs to ``v314.0.0``.
+
+For standalone apps, two patterns are reasonable:
+
+- **Self-hosted bundle**: publish the generated directory with the app or docs.
+ This is the most reproducible option and works offline after the first page
+ load if the browser cache keeps the assets.
+- **Pinned CDN runtime**: load Pyodide from
+ ``https://cdn.jsdelivr.net/pyodide/v314.0.0/full/`` and host only
+ ``browser.html``, ``gfql.js``, the Graphistry wheel, ``lark``, and the
+ manifest. Pyodide's docs list this versioned JsDelivr URL as the cached
+ release CDN path.
+
+In either mode, keep ``manifest.json`` pinned to the Pyodide version and the
+Graphistry wheel built for the release. Static hosts must serve ``.wasm`` with
+the WebAssembly MIME type; Python's local ``http.server`` and many static hosts
+do this correctly.
+
+Implementation notes
+--------------------
+
+- ``gfql.js`` accepts a browser URL wheel or byte-mounted wheel data. URL
+ wheels use ``micropip.install(url, deps=False)``. Byte-mounted local wheels
+ are extracted into Pyodide ``purelib`` after validating wheel member paths.
+- ``gfql.js`` also accepts byte-mounted dependency wheels. In Node, those are
+ installed through Pyodide's ``emfs:`` wheel URI support.
+- The bundle manifest points ``lark`` at a local wheel and records the resolved
+ Pyodide package closure. The ``self-hosted`` flavor serves those wheels from
+ ``pyodide/``; the ``cdn`` flavor serves them from the pinned Pyodide CDN.
+- The demo binds a small ``id`` nodes table derived from CSV endpoints before
+ running GFQL. This avoids pandas 3.0 concat edge cases in Pyodide when
+ Graphistry has to synthesize nodes.
+- The Cypher example intentionally uses ``RETURN e`` because broader
+ multi-alias Cypher projection is outside the currently supported local GFQL
+ compiler subset.
diff --git a/docs/source/static/gfql/.gitignore b/docs/source/static/gfql/.gitignore
new file mode 100644
index 0000000000..dfbd8300e8
--- /dev/null
+++ b/docs/source/static/gfql/.gitignore
@@ -0,0 +1 @@
+/pyodide/
diff --git a/graphistry/ArrowFileUploader.py b/graphistry/ArrowFileUploader.py
index 55c1af01cf..748304d822 100644
--- a/graphistry/ArrowFileUploader.py
+++ b/graphistry/ArrowFileUploader.py
@@ -1,9 +1,16 @@
+from __future__ import annotations
+
import sys, threading, hashlib
from typing import Any, Optional, Dict, Tuple
-import pyarrow as pa
-import pyarrow.ipc as pa_ipc
import requests
+try:
+ import pyarrow as pa
+ import pyarrow.ipc as pa_ipc
+except ImportError:
+ pa = None
+ pa_ipc = None
+
from graphistry.utils.requests import log_requests_error
from graphistry.otel import inject_trace_headers
from .util import setup_logger
diff --git a/graphistry/Engine.py b/graphistry/Engine.py
index bc39199c49..e97015c999 100644
--- a/graphistry/Engine.py
+++ b/graphistry/Engine.py
@@ -2,11 +2,19 @@
import warnings
import numpy as np
import pandas as pd
-import pyarrow as pa
from typing import Any, List, Optional, Union
from typing_extensions import Literal
from enum import Enum
+try:
+ import pyarrow as pa
+except ImportError:
+ pa = None
+
+
+def _is_pyarrow_table(value: Any) -> bool:
+ return pa is not None and isinstance(value, pa.Table)
+
class Engine(Enum):
PANDAS = 'pandas'
@@ -72,7 +80,7 @@ def resolve_engine(
return Engine.PANDAS
# Arrow and Spark are input formats, not compute engines — coerce to pandas at call sites
- if isinstance(g_or_df, pa.Table):
+ if _is_pyarrow_table(g_or_df):
return Engine.PANDAS
try:
@@ -167,7 +175,7 @@ def df_to_engine(df, engine: Engine):
if engine == Engine.PANDAS:
if isinstance(df, pd.DataFrame):
return df
- if isinstance(df, pa.Table):
+ if _is_pyarrow_table(df):
return df.to_pandas()
type_module = str(type(df).__module__)
if 'pyspark' in type_module:
diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py
index c512b734e0..9367390a13 100644
--- a/graphistry/PlotterBase.py
+++ b/graphistry/PlotterBase.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
from graphistry.Plottable import Plottable, RenderModes, RenderModesConcrete
from typing import Any, Callable, Dict, List, Optional, Union, Tuple, cast, overload, TYPE_CHECKING
from typing_extensions import Literal
@@ -10,10 +12,15 @@
from graphistry.plugins_types.hypergraph import HypergraphResult
from graphistry.render.resolve_render_mode import resolve_render_mode
from graphistry.Engine import Engine, EngineAbstractType, df_to_engine
-import copy, hashlib, numpy as np, pandas as pd, pyarrow as pa, sys, uuid, warnings
+import copy, hashlib, numpy as np, pandas as pd, sys, uuid, warnings
from functools import lru_cache, partialmethod
from weakref import WeakValueDictionary
+try:
+ import pyarrow as pa
+except ImportError:
+ pa = None
+
from graphistry.privacy import Privacy, Mode, ModeAction
from graphistry.client_session import (
ClientSession,
@@ -3018,7 +3025,7 @@ def make_arrow_upload(edges: Any, upload_nodes: Any) -> ArrowUploader:
return g._make_arrow_dataset(edges=edges_arr, nodes=nodes_arr, name=name, description=description, metadata=metadata)
if isinstance(graph, pd.DataFrame) \
- or isinstance(graph, pa.Table) \
+ or (pa is not None and isinstance(graph, pa.Table)) \
or ( not (maybe_cudf() is None) and isinstance(graph, maybe_cudf().DataFrame) ) \
or ( not (maybe_dask_cudf() is None) and isinstance(graph, maybe_dask_cudf().DataFrame) ) \
or ( not (maybe_dask_dataframe() is None) and isinstance(graph, maybe_dask_dataframe().DataFrame) ) \
diff --git a/graphistry/arrow_uploader.py b/graphistry/arrow_uploader.py
index 91cb6b5f63..6e7deb3f9c 100644
--- a/graphistry/arrow_uploader.py
+++ b/graphistry/arrow_uploader.py
@@ -1,6 +1,13 @@
+from __future__ import annotations
+
from typing import List, Optional, Dict, Any
-import base64, io, json, pyarrow as pa, requests, sys
+import base64, io, json, requests, sys
+
+try:
+ import pyarrow as pa
+except ImportError:
+ pa = None
from graphistry.privacy import Mode, Privacy, ModeAction
from graphistry.otel import inject_trace_headers