DaveZheng
diff --git a/‎package-lock.json‎
Lines changed: 2 additions & 1 deletion b/‎package-lock.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/device.test.ts‎
Lines changed: 45 additions & 1 deletion b/‎src/device.test.ts‎
Lines changed: 45 additions & 1 deletion
diff --git a/‎src/device.ts‎
Lines changed: 37 additions & 1 deletion b/‎src/device.ts‎
Lines changed: 37 additions & 1 deletion
diff --git a/‎src/index.ts‎
Lines changed: 22 additions & 2 deletions b/‎src/index.ts‎
Lines changed: 22 additions & 2 deletions
@@ -1,6 +1,6 @@
 import { describe, it } from "node:test";
 import assert from "node:assert";
-import { getDeviceInfo, recommendModel } from "./device.js";
+import { getDeviceInfo, recommendModel, getAvailableMemoryGB, lookupModelSize, MODEL_TIERS } from "./device.js";
 
 describe("getDeviceInfo", () => {
   it("returns chip and totalMemoryGB on macOS", async () => {
@@ -41,3 +41,47 @@ describe("recommendModel", () => {
     assert.strictEqual(rec.quantization, "8bit");
   });
 });
+
+describe("getAvailableMemoryGB", () => {
+  it("returns a positive number on macOS", async () => {
+    const gb = await getAvailableMemoryGB();
+    assert.ok(typeof gb === "number", "should return a number");
+    assert.ok(gb > 0, `should be positive, got ${gb}`);
+    assert.ok(gb < 512, `should be reasonable (< 512GB), got ${gb}`);
+  });
+});
+
+describe("lookupModelSize", () => {
+  it("returns size for a known model", () => {
+    const size = lookupModelSize("mlx-community/Qwen2.5-Coder-7B-Instruct-4bit");
+    assert.strictEqual(size, 4);
+  });
+
+  it("returns size for another known model", () => {
+    const size = lookupModelSize("mlx-community/Qwen2.5-Coder-14B-Instruct-4bit");
+    assert.strictEqual(size, 8);
+  });
+
+  it("returns undefined for unknown models", () => {
+    assert.strictEqual(lookupModelSize("some/custom-model"), undefined);
+  });
+
+  it("returns undefined for empty string", () => {
+    assert.strictEqual(lookupModelSize(""), undefined);
+  });
+});
+
+describe("MODEL_TIERS", () => {
+  it("is exported and non-empty", () => {
+    assert.ok(Array.isArray(MODEL_TIERS));
+    assert.ok(MODEL_TIERS.length > 0);
+  });
+
+  it("every tier has required fields", () => {
+    for (const tier of MODEL_TIERS) {
+      assert.ok(typeof tier.modelId === "string");
+      assert.ok(typeof tier.estimatedSizeGB === "number");
+      assert.ok(tier.estimatedSizeGB > 0);
+    }
+  });
+});
@@ -25,7 +25,7 @@ interface ModelTier {
 
 // Actual sizes verified from HuggingFace (2026-02-06)
 // Budget = total RAM × 0.75 (reserve 25% for OS + apps)
-const MODEL_TIERS: ModelTier[] = [
+export const MODEL_TIERS: ModelTier[] = [
   {
     minRAM: 128,
     modelId: "mlx-community/Qwen3-Coder-Next-8bit",
@@ -82,3 +82,39 @@ export function recommendModel(totalMemoryGB: number): ModelRecommendation {
   }
   return MODEL_TIERS[MODEL_TIERS.length - 1];
 }
+
+/**
+ * Parse macOS `vm_stat` output to estimate available memory in GB.
+ * Counts free + inactive + purgeable + speculative pages, which is more
+ * accurate than `os.freemem()` on macOS (which only reports "free" pages).
+ */
+export async function getAvailableMemoryGB(): Promise<number> {
+  const { stdout } = await execFileAsync("vm_stat");
+
+  // First line: "Mach Virtual Memory Statistics: (page size of 16384 bytes)"
+  const pageSizeMatch = stdout.match(/page size of (\d+) bytes/);
+  const pageSize = pageSizeMatch ? parseInt(pageSizeMatch[1], 10) : 16384;
+
+  const get = (label: string): number => {
+    const re = new RegExp(`${label}:\\s+(\\d+)`);
+    const m = stdout.match(re);
+    return m ? parseInt(m[1], 10) : 0;
+  };
+
+  const pages =
+    get("Pages free") +
+    get("Pages inactive") +
+    get("Pages purgeable") +
+    get("Pages speculative");
+
+  return (pages * pageSize) / (1024 ** 3);
+}
+
+/**
+ * Look up estimated model size in GB from MODEL_TIERS.
+ * Returns undefined for custom/unknown models.
+ */
+export function lookupModelSize(modelId: string): number | undefined {
+  const tier = MODEL_TIERS.find((t) => t.modelId === modelId);
+  return tier?.estimatedSizeGB;
+}
@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 import http from "node:http";
 import { loadConfig, saveConfig } from "./config.js";
-import { getDeviceInfo, recommendModel } from "./device.js";
+import { getDeviceInfo, recommendModel, getAvailableMemoryGB, lookupModelSize } from "./device.js";
 import { ensureDependencies, ensureServer, stopServer } from "./server.js";
 import { startProxy, setShuttingDown } from "./proxy.js";
 import { execFileSync, spawn } from "node:child_process";
@@ -85,7 +85,27 @@ async function main(): Promise<void> {
   }
 
   // Ensure python + mlx-lm are available (creates venv on first run)
-  ensureDependencies();
+  await ensureDependencies();
+
+  // Memory check before loading model
+  const modelSizeGB = lookupModelSize(config.model);
+  if (modelSizeGB !== undefined) {
+    const availableGB = await getAvailableMemoryGB();
+    if (availableGB < modelSizeGB) {
+      const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
+      const answer = await new Promise<string>((resolve) =>
+        rl.question(
+          `Warning: ${availableGB.toFixed(1)}GB free memory, model needs ~${modelSizeGB}GB. Continue anyway? (Y/n) `,
+          resolve,
+        ),
+      );
+      rl.close();
+      if (answer.toLowerCase() === "n") {
+        console.log("Aborted.");
+        return;
+      }
+    }
+  }
 
   // Ensure mlx-lm.server is running
   await ensureServer(config.model, config.serverPort);