From c003e4affdf0044145a8521d28535c0736744293 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cmkczarkowski=E2=80=9D?= <marcinczarkowski@onet.eu>
Date: Tue, 21 Oct 2025 16:43:21 +0200
Subject: [PATCH] feat: optimize cloudflare DO usage

---
 mcp-server/.ai/mcp-optimization-plan.md | 538 ++++++++++++++++++++++++
 mcp-server/package-lock.json            |   8 +-
 mcp-server/src/index.ts                 | 143 ++++++-
 3 files changed, 683 insertions(+), 6 deletions(-)
 create mode 100644 mcp-server/.ai/mcp-optimization-plan.md
diff --git a/mcp-server/.ai/mcp-optimization-plan.md b/mcp-server/.ai/mcp-optimization-plan.md
new file mode 100644
index 0000000..22aa241
--- /dev/null
+++ b/mcp-server/.ai/mcp-optimization-plan.md
@@ -0,0 +1,538 @@
+# MCP Server Durable Objects Optimization Plan
+
+## Problem Statement
+
+Account has used 90% of the daily Cloudflare Durable Objects free tier limit of 100,000 rows_written.
+
+**Traffic Stats (Last Month):**
+- Total Requests: 503.62k
+- Success Rate: 68.56% (345.29k)
+- Error Rate: 31.44% (158.33k)
+- Actual Users: 10-500 active users
+
+**Daily Average:** ~16,700 requests/day
+**Expected Writes:** ~50,000+ writes/day
+
+---
+
+## Root Causes Analysis (UltraThink)
+
+### 1. Architecture Issue: New Durable Object Per Request ⚠️
+
+**Current Behavior:**
+```typescript
+// In agents/dist/mcp/index.js (lines 366-378)
+const sessionId = url.searchParams.get("sessionId") || namespace.newUniqueId().toString();
+const id = namespace.idFromName(`sse:${sessionId}`);
+const doStub = namespace.get(id);
+await doStub._init(ctx.props);
+```
+
+**Problem:**
+- Each GET request to `/sse` without a sessionId generates a NEW unique ID
+- Each unique sessionId creates a NEW Durable Object instance
+- Each new DO performs multiple storage writes during initialization
+
+**Storage Writes Per New DO:**
+```typescript
+// From agents library - McpAgent._init() and related methods:
+1. await this.ctx.storage.put("props", props ?? {});           // Line 200
+2. await this.ctx.storage.put("transportType", "unset");       // Line 202
+3. await this.ctx.storage.put("transportType", "sse");         // Line 234
+4. await this.ctx.storage.put("initialized", true);            // Line 211
+```
+
+**The Math:**
+- 16,700 requests/day
+- If 50% are new sessions = 8,350 new DOs
+- Each DO = 3-4 storage writes
+- **Total: 25,000-33,000 writes/day from initialization alone**
+- Plus state updates, reconnections, retries
+- **Result: ~90,000 writes/day (90% of limit!)**
+
+### 2. Bot Traffic (31.44% Error Rate) 🤖
+
+The high error rate (158.33k errors) indicates:
+
+**Likely Sources:**
+- Web crawlers (Google, Bing, etc.) probing `/sse` endpoint
+- Security scanners checking for vulnerabilities
+- Monitoring tools performing health checks
+- AI agent clients with poor retry logic
+- Malformed requests from automated tools
+
+**Impact:**
+- Each bot request creates a new DO (wasted writes)
+- Failed requests may retry multiple times
+- Bots don't reuse sessionIds
+- No authentication = public endpoint abuse
+
+### 3. No Protection Layers 🛡️
+
+**Currently Missing:**
+- ❌ No rate limiting
+- ❌ No bot detection/blocking
+- ❌ No request authentication
+- ❌ No session caching/reuse hints
+- ❌ No dedicated health check endpoint
+- ❌ No write optimization (always writes even if value unchanged)
+
+---
+
+## Solutions (Priority Order)
+
+### Phase 1: IMMEDIATE FIXES (Reduce writes by 60-80%)
+
+#### 1.1 Add Worker-Level Rate Limiting
+**Location:** `mcp-server/src/index.ts`
+
+Block requests BEFORE Durable Object creation:
+
+```typescript
+// Add rate limiting using in-memory cache or CF Rate Limiting API
+const rateLimiter = {
+  async check(ip: string): Promise<boolean> {
+    // Implement per-IP rate limiting: 10 requests/minute
+    // Use Workers KV or in-memory Map with TTL
+  }
+};
+
+export default {
+  async fetch(request: Request, env: Env, ctx: ExecutionContext) {
+    const ip = request.headers.get('CF-Connecting-IP') || 'unknown';
+
+    // Rate limit check
+    if (!(await rateLimiter.check(ip))) {
+      return new Response('Rate limit exceeded', { status: 429 });
+    }
+
+    // ... rest of handler
+  }
+};
+```
+
+**Expected Impact:** Reduce bot traffic by 40-50%
+
+#### 1.2 Add Health Check Endpoint
+**Location:** `mcp-server/src/index.ts`
+
+Redirect monitoring bots away from DO-heavy routes:
+
+```typescript
+export default {
+  fetch(request: Request, env: Env, ctx: ExecutionContext) {
+    const url = new URL(request.url);
+
+    // Lightweight health check (no DO creation)
+    if (url.pathname === "/health" || url.pathname === "/") {
+      return new Response(JSON.stringify({
+        status: "ok",
+        version: "1.0.0"
+      }), {
+        headers: { "Content-Type": "application/json" }
+      });
+    }
+
+    // ... rest of handler
+  }
+};
+```
+
+**Expected Impact:** Reduce unnecessary DO creations by 20-30%
+
+#### 1.3 Implement Lazy Storage Writes
+**Location:** `agents` library behavior (workaround in wrapper)
+
+Only write to storage when values actually change:
+
+```typescript
+// Wrapper around DO to check before writing
+async _init(props) {
+  const existingProps = await this.ctx.storage.get("props");
+  if (JSON.stringify(existingProps) !== JSON.stringify(props)) {
+    await this.ctx.storage.put("props", props ?? {});
+  }
+
+  const existingTransportType = await this.ctx.storage.get("transportType");
+  if (!existingTransportType) {
+    await this.ctx.storage.put("transportType", "unset");
+  }
+
+  // ... rest of init
+}
+```
+
+**Expected Impact:** Reduce duplicate writes by 30-40%
+
+#### 1.4 Implement Session Reuse Strategy
+**Location:** Client-side + documentation
+
+Encourage clients to reuse sessionIds:
+
+```typescript
+// Return sessionId in response headers
+return new Response(readable, {
+  headers: {
+    "Content-Type": "text/event-stream",
+    "X-Session-ID": sessionId,  // Client can reuse this
+    "Cache-Control": "no-cache",
+    // ...
+  }
+});
+```
+
+Update README.md to document session reuse:
+```markdown
+### Best Practices
+- **Reuse Session IDs:** Save the sessionId from the initial connection
+- **Reconnect Logic:** Use ?sessionId=<saved-id> for reconnections
+- **Reduces Server Load:** Reusing sessions prevents unnecessary DO creation
+```
+
+**Expected Impact:** Reduce new DO creation by 50-70% (for legitimate clients)
+
+---
+
+### Phase 2: SHORT-TERM IMPROVEMENTS (Additional 10-20% reduction)
+
+#### 2.1 Enable Cloudflare Bot Management
+**Location:** `mcp-server/src/index.ts`
+
+Use Cloudflare's bot detection:
+
+```typescript
+export default {
+  async fetch(request: Request, env: Env, ctx: ExecutionContext) {
+    // Check bot score (0-99, lower = more likely bot)
+    const botScore = request.cf?.botManagement?.score || 0;
+
+    // Block obvious bots (score < 30)
+    if (botScore < 30) {
+      return new Response('Forbidden', { status: 403 });
+    }
+
+    // Challenge suspicious traffic (score 30-50)
+    if (botScore < 50) {
+      // Require Turnstile challenge or API key
+    }
+
+    // ... rest of handler
+  }
+};
+```
+
+**Note:** Requires Cloudflare Bot Management (available on Pro+ plans)
+
+**Expected Impact:** Block 60-70% of bot traffic
+
+#### 2.2 Add Request Validation
+**Location:** `mcp-server/src/index.ts`
+
+Reject invalid requests early:
+
+```typescript
+export default {
+  async fetch(request: Request, env: Env, ctx: ExecutionContext) {
+    const url = new URL(request.url);
+
+    // Validate MCP endpoints
+    if (url.pathname === "/sse") {
+      // Must accept text/event-stream
+      const accept = request.headers.get("Accept") || "";
+      if (!accept.includes("text/event-stream")) {
+        return new Response('Invalid Accept header', { status: 406 });
+      }
+
+      // Check User-Agent (block empty or suspicious)
+      const userAgent = request.headers.get("User-Agent") || "";
+      if (!userAgent || userAgent.length < 5) {
+        return new Response('Invalid User-Agent', { status: 400 });
+      }
+    }
+
+    // ... rest of handler
+  }
+};
+```
+
+**Expected Impact:** Reduce malformed requests by 20-30%
+
+#### 2.3 Add Optional Authentication
+**Location:** `mcp-server/src/index.ts`
+
+Protect against unauthorized usage:
+
+```typescript
+export default {
+  async fetch(request: Request, env: Env, ctx: ExecutionContext) {
+    // Optional: Require API key for public endpoint
+    const apiKey = request.headers.get("X-API-Key");
+
+    if (env.REQUIRE_AUTH && !apiKey) {
+      return new Response('Unauthorized', { status: 401 });
+    }
+
+    if (apiKey && apiKey !== env.MCP_API_KEY) {
+      return new Response('Invalid API key', { status: 403 });
+    }
+
+    // ... rest of handler
+  }
+};
+```
+
+**Expected Impact:** Control access, prevent abuse
+
+---
+
+### Phase 3: LONG-TERM OPTIMIZATIONS (Architectural improvements)
+
+#### 3.1 Use Cloudflare KV for Session Metadata
+**Location:** New KV namespace + `mcp-server/src/index.ts`
+
+Store session metadata in KV (cheaper, no write limits):
+
+```typescript
+// wrangler.jsonc - Add KV binding
+{
+  "kv_namespaces": [
+    { "binding": "MCP_SESSIONS", "id": "..." }
+  ]
+}
+
+// src/index.ts
+export default {
+  async fetch(request: Request, env: Env, ctx: ExecutionContext) {
+    const sessionId = url.searchParams.get("sessionId");
+
+    if (sessionId) {
+      // Check if session exists in KV first
+      const session = await env.MCP_SESSIONS.get(sessionId);
+      if (!session) {
+        return new Response('Session not found', { status: 404 });
+      }
+    } else {
+      // Create new session in KV
+      const newSessionId = crypto.randomUUID();
+      await env.MCP_SESSIONS.put(newSessionId, JSON.stringify({
+        created: Date.now(),
+        lastAccess: Date.now()
+      }), { expirationTtl: 3600 }); // 1 hour TTL
+    }
+
+    // Only create DO for active connections
+    // ... rest of handler
+  }
+};
+```
+
+**Expected Impact:**
+- Reduce DO storage writes by 80%
+- KV writes are free (up to 1000/day on free tier, then $0.50/million)
+
+#### 3.2 Enable Smart Placement
+**Location:** `mcp-server/wrangler.jsonc`
+
+Uncomment Smart Placement:
+
+```jsonc
+{
+  "placement": { "mode": "smart" }
+}
+```
+
+**Expected Impact:**
+- Better resource utilization
+- Reduced latency
+- Potential cost savings
+
+#### 3.3 Add Comprehensive Observability
+**Location:** `mcp-server/src/index.ts`
+
+Track metrics to monitor optimization effectiveness:
+
+```typescript
+// Log key metrics
+ctx.waitUntil(
+  env.ANALYTICS?.writeDataPoint({
+    blobs: [url.pathname, request.cf?.country || 'unknown'],
+    doubles: [botScore, responseTime],
+    indexes: [sessionId ? 'existing' : 'new']
+  })
+);
+```
+
+**Metrics to Track:**
+- Requests per endpoint
+- SessionId reuse rate
+- Bot score distribution
+- Error rates by cause
+- DO creation rate
+- Storage write count
+
+#### 3.4 Implement DO Hibernation Optimization
+**Location:** Already enabled in agents library
+
+Ensure hibernation is working properly:
+- DOs should hibernate when idle
+- Reduces memory usage
+- Automatic wake-up on new requests
+
+**Verify in code:** `Agent.options = { hibernate: true }` (already set)
+
+---
+
+## Implementation Timeline
+
+### Week 1: Quick Wins (Phase 1)
+**Day 1-2:**
+- [ ] Add health check endpoint
+- [ ] Add request validation
+- [ ] Document session reuse
+
+**Day 3-5:**
+- [ ] Implement rate limiting
+- [ ] Add lazy storage writes
+- [ ] Test and deploy
+
+**Expected Results:** 60-80% reduction (18k-36k writes/day)
+
+### Week 2: Improvements (Phase 2)
+**Day 1-3:**
+- [ ] Evaluate Cloudflare Bot Management (may require plan upgrade)
+- [ ] Add bot filtering logic
+- [ ] Implement optional authentication
+
+**Day 4-5:**
+- [ ] Test bot filtering
+- [ ] Monitor metrics
+- [ ] Adjust thresholds
+
+**Expected Results:** 80-90% reduction (9k-18k writes/day)
+
+### Week 3-4: Architecture (Phase 3)
+**Week 3:**
+- [ ] Add KV namespace
+- [ ] Migrate session metadata to KV
+- [ ] Implement session lifecycle
+
+**Week 4:**
+- [ ] Add comprehensive observability
+- [ ] Enable Smart Placement
+- [ ] Performance testing
+
+**Expected Results:** 95%+ reduction (<5k writes/day)
+
+---
+
+## Expected Outcomes
+
+### Immediate (Week 1)
+- **Writes Reduction:** 60-80%
+- **Daily Writes:** 18,000-36,000 (18-36% of limit)
+- **Monthly Cost:** Stay within free tier
+- **Error Rate:** Reduced to ~20%
+
+### Short-term (Week 2)
+- **Writes Reduction:** 80-90%
+- **Daily Writes:** 9,000-18,000 (9-18% of limit)
+- **Bot Traffic:** 70% reduction
+- **Error Rate:** Reduced to ~10%
+
+### Long-term (Week 3-4)
+- **Writes Reduction:** 95%+
+- **Daily Writes:** <5,000 (<5% of limit)
+- **Sustainable:** Can scale to 100x current traffic
+- **Error Rate:** <5%
+
+---
+
+## Cost Analysis
+
+### Current State (Free Tier)
+- **DO Writes:** ~90,000/day (90% of limit)
+- **Risk:** Approaching paid tier
+- **Paid tier starts at:** $5/month + $0.20/million writes
+- **Potential monthly cost:** $5 + (90k × 30 × $0.20/1M) = ~$5.54/month
+
+### After Optimization (Free Tier)
+- **DO Writes:** <5,000/day (<5% of limit)
+- **Risk:** Comfortable margin
+- **Monthly cost:** $0 (stay in free tier)
+- **Headroom:** Can handle 20x growth
+
+---
+
+## Monitoring & Alerts
+
+### Key Metrics to Track
+1. **DO Write Count:** Daily writes to storage
+2. **Session Reuse Rate:** % of requests with existing sessionId
+3. **Bot Traffic:** % of requests blocked/challenged
+4. **Error Rate:** % of failed requests
+5. **Response Time:** P50, P95, P99 latencies
+
+### Recommended Alerts
+- ⚠️ Warning: >70% of daily write limit
+- 🚨 Critical: >85% of daily write limit
+- 📊 Weekly: Session reuse rate report
+- 🤖 Weekly: Bot traffic analysis
+
+---
+
+## Alternative Approaches
+
+### Option A: Stateless MCP Server (No DOs)
+**Pros:**
+- Zero DO writes
+- Unlimited scaling
+- Lower costs
+
+**Cons:**
+- No session persistence
+- Cannot use SSE transport properly
+- Requires architecture change
+
+### Option B: Upgrade to Paid Plan
+**Pros:**
+- Immediate relief
+- No code changes
+
+**Cons:**
+- Ongoing costs (~$5-10/month)
+- Doesn't fix root cause
+- Will scale with traffic
+
+### Option C: Hybrid Approach (Recommended)
+**Pros:**
+- Best of both worlds
+- Use DOs only for active sessions
+- Use KV for metadata
+
+**Cons:**
+- More complex implementation
+- Requires Phase 3 work
+
+---
+
+## Questions for Discussion
+
+1. **Bot Management:** Do you have a Cloudflare paid plan? (Bot Management requires Pro+)
+Answer: Nope.
+2. **Authentication:** Do you want to add API key authentication?
+Answer: Nope.
+3. **Monitoring:** Do you have access to Cloudflare Analytics Engine?
+Answer: Yes.
+4. **Timeline:** Which phase should we prioritize first?
+Answer: Phase 1.
+5. **Users:** Are most of your users using Claude Desktop, Cursor, or other clients?
+Answer: Cursor.
+---
+
+## References
+
+- [Cloudflare Durable Objects Docs](https://developers.cloudflare.com/durable-objects/)
+- [Durable Objects Limits](https://developers.cloudflare.com/durable-objects/platform/limits/)
+- [Cloudflare Bot Management](https://developers.cloudflare.com/bots/)
+- [Workers Rate Limiting](https://developers.cloudflare.com/workers/examples/rate-limiting/)
+- [agents Library](https://github.com/cloudflare/agents)
diff --git a/mcp-server/package-lock.json b/mcp-server/package-lock.json
index 11b78c7..1811866 100644
--- a/mcp-server/package-lock.json
+++ b/mcp-server/package-lock.json
@@ -1,12 +1,12 @@
 {
-  "name": "remote-mcp-server-authless",
-  "version": "0.0.0",
+  "name": "10x-rules-mcp-server",
+  "version": "0.0.1",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
-      "name": "remote-mcp-server-authless",
-      "version": "0.0.0",
+      "name": "10x-rules-mcp-server",
+      "version": "0.0.1",
       "dependencies": {
         "@modelcontextprotocol/sdk": "^1.7.0",
         "agents": "^0.0.65",
diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts
index a216e91..b3e5b27 100644
--- a/mcp-server/src/index.ts
+++ b/mcp-server/src/index.ts
@@ -40,6 +40,75 @@ export class MyMCP extends McpAgent {
 	}
 }
 
+// Rate Limiter Implementation
+// Uses in-memory Map to track requests per IP
+// Automatically cleans up old entries to prevent memory leaks
+class RateLimiter {
+	private requests: Map<string, number[]> = new Map();
+	private readonly maxRequests: number;
+	private readonly windowMs: number;
+
+	constructor(maxRequests = 10, windowMs = 60000) {
+		this.maxRequests = maxRequests;
+		this.windowMs = windowMs;
+	}
+
+	check(ip: string): boolean {
+		const now = Date.now();
+		const timestamps = this.requests.get(ip) || [];
+
+		// Remove timestamps outside the current window
+		const validTimestamps = timestamps.filter(ts => now - ts < this.windowMs);
+
+		// Check if limit exceeded
+		if (validTimestamps.length >= this.maxRequests) {
+			return false;
+		}
+
+		// Add current timestamp
+		validTimestamps.push(now);
+		this.requests.set(ip, validTimestamps);
+
+		// Periodically clean up old entries to prevent memory leaks
+		if (this.requests.size > 1000) {
+			this.cleanup(now);
+		}
+
+		return true;
+	}
+
+	private cleanup(now: number): void {
+		for (const [ip, timestamps] of this.requests.entries()) {
+			const validTimestamps = timestamps.filter(ts => now - ts < this.windowMs);
+			if (validTimestamps.length === 0) {
+				this.requests.delete(ip);
+			} else {
+				this.requests.set(ip, validTimestamps);
+			}
+		}
+	}
+}
+
+// Global rate limiter instance
+const rateLimiter = new RateLimiter(10, 60000); // 10 requests per minute
+
+// Request validation helpers
+function validateSSERequest(request: Request): { valid: boolean; error?: string } {
+	// Check Accept header
+	const accept = request.headers.get("Accept") || "";
+	if (!accept.includes("text/event-stream") && !accept.includes("*/*")) {
+		return { valid: false, error: "Invalid Accept header. Must accept text/event-stream" };
+	}
+
+	// Check User-Agent (block empty or suspicious)
+	const userAgent = request.headers.get("User-Agent") || "";
+	if (!userAgent || userAgent.length < 5) {
+		return { valid: false, error: "Invalid or missing User-Agent" };
+	}
+
+	return { valid: true };
+}
+
 // Define more specific types for Env and ExecutionContext if known for the environment
 // Example for Cloudflare Workers:
 // interface Env { /* ... bindings ... */ }
@@ -48,9 +117,73 @@ export default {
 	fetch(request: Request, env: Env, ctx: ExecutionContext) {
 		const url = new URL(request.url);
 
+		// Phase 1.2: Health Check Endpoint (no DO creation)
+		if (url.pathname === "/health" || url.pathname === "/") {
+			return new Response(JSON.stringify({
+				status: "ok",
+				version: "1.0.0",
+				server: "MCP Rules Server"
+			}), {
+				headers: {
+					"Content-Type": "application/json",
+					"Cache-Control": "public, max-age=60"
+				}
+			});
+		}
+
+		// Phase 1.1: Rate Limiting (before DO creation)
+		const ip = request.headers.get("CF-Connecting-IP") ||
+			request.headers.get("X-Forwarded-For") ||
+			"unknown";
+
+		if (!rateLimiter.check(ip)) {
+			return new Response(JSON.stringify({
+				error: "Rate limit exceeded",
+				message: "Too many requests. Please try again later.",
+				retryAfter: 60
+			}), {
+				status: 429,
+				headers: {
+					"Content-Type": "application/json",
+					"Retry-After": "60"
+				}
+			});
+		}
+
+		// Phase 1.2: Request Validation (for SSE endpoints)
 		if (url.pathname === "/sse" || url.pathname === "/sse/message") {
+			const validation = validateSSERequest(request);
+			if (!validation.valid) {
+				return new Response(JSON.stringify({
+					error: "Invalid request",
+					message: validation.error
+				}), {
+					status: 400,
+					headers: { "Content-Type": "application/json" }
+				});
+			}
+
+			// Phase 1.4: Extract sessionId for session reuse tracking
+			const sessionId = url.searchParams.get("sessionId");
+
 			// @ts-expect-error - env is not typed
-			return MyMCP.serveSSE("/sse").fetch(request, env, ctx);
+			const response = MyMCP.serveSSE("/sse").fetch(request, env, ctx);
+
+			// Return response with session reuse headers
+			return response.then((res) => {
+				// If this is a new session (no sessionId provided), add headers to encourage reuse
+				if (!sessionId && res.status === 200) {
+					const newHeaders = new Headers(res.headers);
+					newHeaders.set("X-Session-Reuse", "Please save sessionId from URL and reuse for reconnections");
+					newHeaders.set("X-Session-Info", "Reusing sessions reduces server load and improves performance");
+					return new Response(res.body, {
+						status: res.status,
+						statusText: res.statusText,
+						headers: newHeaders
+					});
+				}
+				return res;
+			});
 		}
 
 		if (url.pathname === "/mcp") {
@@ -58,6 +191,12 @@ export default {
 			return MyMCP.serve("/mcp").fetch(request, env, ctx);
 		}
 
-		return new Response("Not found", { status: 404 });
+		return new Response(JSON.stringify({
+			error: "Not found",
+			availableEndpoints: ["/health", "/sse", "/mcp"]
+		}), {
+			status: 404,
+			headers: { "Content-Type": "application/json" }
+		});
 	},
 };