feat: npm distribution, MCP safety guardrails, one-click onboarding prompt

alvinttang · alvinttang · commit 653ac33fc99d · 2026-03-30T22:48:53.000+08:00
npm/: npx cortex-memory — downloads prebuilt binary from GitHub Releases,
supports darwin/linux × arm64/x64, zero dependencies.

MCP guardrails (tools.rs):
- MAX_INGEST_TEXT_BYTES=100KB, MAX_BATCH_SIZE=100, MAX_SEARCH_LIMIT=100
- MAX_CONTEXT_TOKENS=8000, MAX_TAG_SCAN_PER_TIER=10000
- Clamp compress params (min_messages≥1, max_age_days≥1)
- Block self-merge in person_merge

QUICK_START_PROMPT.md: copy-paste prompt for Claude Code that installs
Cortex, stores demo memories, and demos search/context/facts/beliefs.
diff --git a/QUICK_START_PROMPT.md b/QUICK_START_PROMPT.md
@@ -0,0 +1,33 @@
+# Cortex Quick Start Prompt
+
+Copy the prompt below and paste it into Claude Code to automatically install Cortex and demo its memory features in one shot.
+
+---
+
+```
+Install Cortex memory engine and configure it as my MCP server, then demo the key features.
+
+## Step 1: Install
+Run: curl -fsSL https://raw.githubusercontent.com/gambletan/cortex/main/install.sh | bash -s -- --ide claude
+
+## Step 2: Verify
+Run: cortex-mcp-server info
+Run: cortex-mcp-server ~/.cortex/memory.db stats
+
+## Step 3: Demo — store some memories
+Use the memory_ingest tool to store these:
+- "I'm a software engineer working at Google"
+- "I live in Shanghai and speak Chinese and English"
+- "I prefer Rust over C++ for systems programming"
+- "Met with Sarah from Stripe about payment integration last Tuesday"
+
+## Step 4: Demo — query
+- Search for "Sarah" and show what comes back
+- Run memory_context to show the AI-ready context summary
+- Query facts about "User" to show extracted knowledge
+- List beliefs to show Bayesian inference
+- Show stats
+
+## Step 5: Summary
+Print a summary of what was set up and the capabilities now available.
+```
diff --git a/cortex-mcp-server/src/tools.rs b/cortex-mcp-server/src/tools.rs
@@ -5,6 +5,13 @@ use serde_json::{json, Value};
 use std::sync::Arc;
 use uuid::Uuid;
 
+// ── Safety guardrails ───────────────────────────────────────────────────────
+const MAX_INGEST_TEXT_BYTES: usize = 100_000;   // 100KB per memory
+const MAX_BATCH_SIZE: usize = 100;              // memory_ingest_batch
+const MAX_SEARCH_LIMIT: usize = 100;            // memory_search
+const MAX_CONTEXT_TOKENS: usize = 8_000;        // memory_context
+const MAX_TAG_SCAN_PER_TIER: usize = 10_000;    // tag_list_taxonomy
+
 /// Return the list of available tools (MCP tool schema format).
 /// Includes built-in tools and any plugin-registered tools.
 pub fn list_tools_with_plugins(cortex: &Arc<Cortex>) -> Value {
@@ -580,6 +587,9 @@ fn get_embedding(args: &Value) -> Option<Vec<f32>> {
 
 fn tool_memory_ingest(cortex: &Arc<Cortex>, args: &Value) -> Result<String, String> {
     let text = get_str(args, "text").ok_or("missing 'text'")?;
+    if text.len() > MAX_INGEST_TEXT_BYTES {
+        return Err(format!("text too large: {} bytes (max {})", text.len(), MAX_INGEST_TEXT_BYTES));
+    }
     let channel = get_str(args, "channel").ok_or("missing 'channel'")?;
     let user_id = get_str(args, "user_id");
     let salience = args.get("salience").and_then(|v| v.as_f64()).map(|v| v as f32);
@@ -629,7 +639,7 @@ fn tool_memory_consolidate(cortex: &Arc<Cortex>) -> Result<String, String> {
 
 fn tool_memory_search(cortex: &Arc<Cortex>, args: &Value) -> Result<String, String> {
     let query = get_str(args, "query").ok_or("missing 'query'")?;
-    let limit = get_usize(args, "limit", 10);
+    let limit = get_usize(args, "limit", 10).min(MAX_SEARCH_LIMIT);
     let channel = get_str(args, "channel");
     let person_id = get_str(args, "person_id")
         .and_then(|s| Uuid::parse_str(s).ok());
@@ -662,7 +672,7 @@ fn tool_memory_search(cortex: &Arc<Cortex>, args: &Value) -> Result<String, Stri
 }
 
 fn tool_memory_context(cortex: &Arc<Cortex>, args: &Value) -> Result<String, String> {
-    let max_tokens = get_usize(args, "max_tokens", 2000);
+    let max_tokens = get_usize(args, "max_tokens", 2000).min(MAX_CONTEXT_TOKENS);
     let channel = get_str(args, "channel");
     let person_id = get_str(args, "person_id")
         .and_then(|s| Uuid::parse_str(s).ok());
@@ -852,6 +862,8 @@ fn tool_memory_compress(cortex: &Arc<Cortex>, args: &Value) -> Result<String, St
         .get("max_age_days")
         .and_then(|v| v.as_i64())
         .unwrap_or(7);
+    let min_messages = min_messages.max(1); // prevent compressing everything
+    let max_age_days = max_age_days.max(1); // prevent compressing fresh memories
 
     let report = cortex
         .run_compression(min_messages, max_age_days)
@@ -1001,6 +1013,9 @@ fn tool_memory_ingest_batch(cortex: &Arc<Cortex>, args: &Value) -> Result<String
     let items_arr = args.get("items")
         .and_then(|v| v.as_array())
         .ok_or("missing 'items' array")?;
+    if items_arr.len() > MAX_BATCH_SIZE {
+        return Err(format!("batch too large: {} items (max {})", items_arr.len(), MAX_BATCH_SIZE));
+    }
 
     let items: Vec<cortex_core::types::BatchIngestItem> = items_arr.iter().map(|item| {
         cortex_core::types::BatchIngestItem {
@@ -1035,7 +1050,7 @@ fn tool_tag_list_taxonomy(cortex: &Arc<Cortex>) -> Result<String, String> {
     ];
 
     for tier in &tiers {
-        if let Ok(mems) = cortex.storage().list_by_tier(*tier, 100_000) {
+        if let Ok(mems) = cortex.storage().list_by_tier(*tier, MAX_TAG_SCAN_PER_TIER) {
             for mem in mems {
                 for tag in &mem.tags {
                     *tag_counts.entry(tag.clone()).or_insert(0) += 1;
@@ -1103,6 +1118,9 @@ fn tool_person_merge(cortex: &Arc<Cortex>, args: &Value) -> Result<String, Strin
     let source_str = get_str(args, "source_id").ok_or("missing 'source_id'")?;
     let target_id = Uuid::parse_str(target_str).map_err(|e| format!("Invalid target UUID: {}", e))?;
     let source_id = Uuid::parse_str(source_str).map_err(|e| format!("Invalid source UUID: {}", e))?;
+    if target_id == source_id {
+        return Err("cannot merge a person with themselves".to_string());
+    }
 
     let merged = cortex.merge_people(target_id, source_id).map_err(|e| e.to_string())?;
 
diff --git a/npm/README.md b/npm/README.md
@@ -0,0 +1,29 @@
+# cortex-memory
+
+Private local memory engine for AI agents. Zero cloud, sub-ms latency, 3.8 MB binary. Provides persistent, structured memory via the Model Context Protocol (MCP).
+
+## Quick start
+
+```bash
+npx cortex-memory
+```
+
+## Install globally
+
+```bash
+npm install -g cortex-memory
+```
+
+Then run anywhere:
+
+```bash
+cortex-memory
+```
+
+## What is Cortex?
+
+Cortex is a Rust-powered MCP server that gives AI coding agents long-term memory: facts, preferences, relationships, and beliefs -- all stored locally on your machine with no cloud dependency.
+
+## More info
+
+Full documentation and source: [github.com/gambletan/cortex](https://github.com/gambletan/cortex)
diff --git a/npm/bin/install.js b/npm/bin/install.js
@@ -0,0 +1,144 @@
+#!/usr/bin/env node
+"use strict";
+
+const https = require("https");
+const fs = require("fs");
+const path = require("path");
+const os = require("os");
+const { execSync } = require("child_process");
+
+const REPO = "gambletan/cortex";
+const BINARY_NAME = "cortex-mcp-server";
+const BIN_DIR = path.join(__dirname);
+const BINARY_PATH = path.join(BIN_DIR, BINARY_NAME);
+
+function getPlatformSuffix() {
+  const platform = os.platform();
+  const arch = os.arch();
+
+  const platformMap = {
+    darwin: "darwin",
+    linux: "linux",
+  };
+
+  const archMap = {
+    arm64: "arm64",
+    x64: "x86_64",
+  };
+
+  const osSuffix = platformMap[platform];
+  const archSuffix = archMap[arch];
+
+  if (!osSuffix) {
+    throw new Error(
+      `Unsupported platform: ${platform}. Only darwin and linux are supported.`
+    );
+  }
+  if (!archSuffix) {
+    throw new Error(
+      `Unsupported architecture: ${arch}. Only arm64 and x64 are supported.`
+    );
+  }
+
+  return `${osSuffix}-${archSuffix}`;
+}
+
+function httpsGet(url) {
+  return new Promise((resolve, reject) => {
+    const request = https.get(url, { headers: { "User-Agent": "cortex-memory-npm" } }, (res) => {
+      if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
+        httpsGet(res.headers.location).then(resolve, reject);
+        return;
+      }
+      if (res.statusCode !== 200) {
+        reject(new Error(`HTTP ${res.statusCode} fetching ${url}`));
+        return;
+      }
+      const chunks = [];
+      res.on("data", (chunk) => chunks.push(chunk));
+      res.on("end", () => resolve(Buffer.concat(chunks)));
+      res.on("error", reject);
+    });
+    request.on("error", reject);
+    request.setTimeout(60000, () => {
+      request.destroy();
+      reject(new Error(`Request timed out: ${url}`));
+    });
+  });
+}
+
+async function fetchLatestRelease() {
+  const url = `https://api.github.com/repos/${REPO}/releases/latest`;
+  const data = await httpsGet(url);
+  return JSON.parse(data.toString());
+}
+
+async function install() {
+  const suffix = getPlatformSuffix();
+  const assetName = `${BINARY_NAME}-${suffix}.tar.gz`;
+
+  console.log(`[cortex-memory] Detected platform: ${suffix}`);
+  console.log(`[cortex-memory] Fetching latest release from ${REPO}...`);
+
+  const release = await fetchLatestRelease();
+  const asset = release.assets.find((a) => a.name === assetName);
+
+  if (!asset) {
+    // Try lite variant as fallback (linux-arm64 only has lite)
+    const liteAssetName = `${BINARY_NAME}-lite-${suffix}.tar.gz`;
+    const liteAsset = release.assets.find((a) => a.name === liteAssetName);
+
+    if (!liteAsset) {
+      const available = release.assets.map((a) => a.name).join(", ");
+      throw new Error(
+        `No binary found for ${suffix}.\n` +
+        `Looked for: ${assetName} or ${liteAssetName}\n` +
+        `Available: ${available}\n` +
+        `Release: ${release.tag_name}`
+      );
+    }
+
+    console.log(`[cortex-memory] Full binary not available for ${suffix}, using lite variant.`);
+    console.log(`[cortex-memory] Downloading ${liteAssetName} (${release.tag_name})...`);
+    await downloadAndExtract(liteAsset.browser_download_url);
+    return;
+  }
+
+  console.log(`[cortex-memory] Downloading ${assetName} (${release.tag_name})...`);
+  await downloadAndExtract(asset.browser_download_url);
+}
+
+async function downloadAndExtract(downloadUrl) {
+  const tarball = await httpsGet(downloadUrl);
+
+  const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "cortex-"));
+  const tarPath = path.join(tmpDir, "binary.tar.gz");
+
+  try {
+    fs.writeFileSync(tarPath, tarball);
+    execSync(`tar xzf "${tarPath}" -C "${tmpDir}"`, { stdio: "pipe" });
+
+    const extractedBinary = path.join(tmpDir, BINARY_NAME);
+    if (!fs.existsSync(extractedBinary)) {
+      throw new Error(
+        `Binary not found after extraction. Expected: ${BINARY_NAME} in archive.`
+      );
+    }
+
+    fs.copyFileSync(extractedBinary, BINARY_PATH);
+    fs.chmodSync(BINARY_PATH, 0o755);
+
+    console.log(`[cortex-memory] Installed ${BINARY_NAME} to ${BINARY_PATH}`);
+  } finally {
+    fs.rmSync(tmpDir, { recursive: true, force: true });
+  }
+}
+
+install().catch((err) => {
+  console.error(`[cortex-memory] Installation failed: ${err.message}`);
+  console.error(
+    "[cortex-memory] You can manually download the binary from:"
+  );
+  console.error(`  https://github.com/${REPO}/releases/latest`);
+  process.exit(1);
+});
diff --git a/npm/bin/run.js b/npm/bin/run.js
@@ -0,0 +1,41 @@
+#!/usr/bin/env node
+"use strict";
+
+const path = require("path");
+const fs = require("fs");
+const { spawn } = require("child_process");
+
+const BINARY_NAME = "cortex-mcp-server";
+const BINARY_PATH = path.join(__dirname, BINARY_NAME);
+
+if (!fs.existsSync(BINARY_PATH)) {
+  console.error(
+    `[cortex-memory] Binary not found at ${BINARY_PATH}\n` +
+    `\n` +
+    `The postinstall script may have failed. Try reinstalling:\n` +
+    `  npm install -g cortex-memory\n` +
+    `\n` +
+    `Or download the binary manually from:\n` +
+    `  https://github.com/gambletan/cortex/releases/latest`
+  );
+  process.exit(1);
+}
+
+const args = process.argv.slice(2);
+
+const child = spawn(BINARY_PATH, args, {
+  stdio: "inherit",
+});
+
+child.on("error", (err) => {
+  console.error(`[cortex-memory] Failed to start ${BINARY_NAME}: ${err.message}`);
+  process.exit(1);
+});
+
+child.on("exit", (code, signal) => {
+  if (signal) {
+    process.kill(process.pid, signal);
+  } else {
+    process.exit(code ?? 1);
+  }
+});
diff --git a/npm/package.json b/npm/package.json
@@ -0,0 +1,23 @@
+{
+  "name": "cortex-memory",
+  "version": "2.0.0",
+  "description": "Private local memory engine for AI agents — zero cloud, sub-ms latency, 3.8MB",
+  "bin": {
+    "cortex-memory": "./bin/run.js"
+  },
+  "scripts": {
+    "postinstall": "node ./bin/install.js"
+  },
+  "keywords": ["ai", "memory", "mcp", "llm", "local", "privacy", "rust"],
+  "author": "gambletan",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/gambletan/cortex"
+  },
+  "os": ["darwin", "linux"],
+  "cpu": ["x64", "arm64"],
+  "engines": {
+    "node": ">=16"
+  }
+}