feat(compact): auto-compact at 95% of model token limit

StackMemory Bot (CLI) · StackMemory Bot (CLI) · commit f01d3d062126 · 2026-02-13T08:40:50.000-05:00
CompactionHandler now derives thresholds from model context window:
- warning at 90%, critical/auto-compact at 95%
- Accepts model name or explicit limit, defaults to 200K
- Dashboard uses dynamic limit via getModelTokenLimit()
diff --git a/src/cli/commands/dashboard.ts b/src/cli/commands/dashboard.ts
@@ -10,6 +10,7 @@ import { FrameManager } from '../../core/context/index.js';
 import Database from 'better-sqlite3';
 import { join } from 'path';
 import { existsSync } from 'fs';
+import { getModelTokenLimit } from '../../core/models/model-router.js';
 
 /** Frame statistics row */
 interface FrameStatsRow {
@@ -280,7 +281,7 @@ async function estimateContextUsage(db: Database): Promise<number> {
   // Rough estimate: assume average token is 4 bytes
   const totalBytes = (result?.input_size || 0) + (result?.output_size || 0);
   const estimatedTokens = totalBytes / 4;
-  const maxTokens = 128000; // Claude's context window
+  const maxTokens = getModelTokenLimit(process.env.ANTHROPIC_MODEL);
 
   return Math.round((estimatedTokens / maxTokens) * 100);
 }
diff --git a/src/core/context/enhanced-rehydration.ts b/src/core/context/enhanced-rehydration.ts
@@ -11,6 +11,10 @@ import * as path from 'path';
 import { logger } from '../monitoring/logger.js';
 import { FrameManager } from './index.js';
 import type { Anchor, Event } from './index.js';
+import {
+  getModelTokenLimit,
+  DEFAULT_MODEL_TOKEN_LIMIT,
+} from '../models/model-router.js';
 
 // ============================================================================
 // Compaction Handler Types
@@ -72,17 +76,40 @@ export class CompactionHandler {
   private metrics: CompactionMetrics;
   private tokenAccumulator: number = 0;
   private preservedAnchors: Map<string, CriticalContextAnchor> = new Map();
+  private modelTokenLimit: number;
 
-  constructor(frameManager: FrameManager) {
+  /**
+   * @param frameManager - Frame manager instance
+   * @param modelOrLimit - Model name string (looked up in MODEL_TOKEN_LIMITS)
+   *                       or explicit numeric token limit.
+   *                       Defaults to DEFAULT_MODEL_TOKEN_LIMIT (200K).
+   *
+   * Thresholds are derived from the model limit:
+   *   warning  = 90% of limit
+   *   critical = 95% of limit (auto-compact trigger)
+   */
+  constructor(frameManager: FrameManager, modelOrLimit?: string | number) {
     this.frameManager = frameManager;
+    this.modelTokenLimit =
+      typeof modelOrLimit === 'number'
+        ? modelOrLimit
+        : getModelTokenLimit(modelOrLimit ?? undefined);
+
     this.metrics = {
       estimatedTokens: 0,
-      warningThreshold: 150000, // 150K tokens
-      criticalThreshold: 170000, // 170K tokens
+      warningThreshold: Math.floor(this.modelTokenLimit * 0.9),
+      criticalThreshold: Math.floor(this.modelTokenLimit * 0.95),
       anchorsPreserved: 0,
     };
   }
 
+  /**
+   * Get the resolved model token limit
+   */
+  getModelTokenLimit(): number {
+    return this.modelTokenLimit;
+  }
+
   /**
    * Track token usage from a message
    */
diff --git a/src/core/models/model-router.ts b/src/core/models/model-router.ts
@@ -20,6 +20,42 @@ export type ModelProvider =
   | 'custom';
 export type TaskType = 'default' | 'plan' | 'think' | 'code' | 'review';
 
+/**
+ * Known context window sizes (max tokens) for popular models.
+ * Used by CompactionHandler to compute auto-compact thresholds.
+ */
+export const MODEL_TOKEN_LIMITS: Record<string, number> = {
+  // Claude 4.x / 4.5 / 4.6
+  'claude-opus-4-6': 200000,
+  'claude-sonnet-4-5-20250929': 200000,
+  'claude-haiku-4-5-20251001': 200000,
+  'claude-sonnet-4-20250514': 200000,
+  // Claude 3.x
+  'claude-3-5-sonnet-20241022': 200000,
+  'claude-3-5-haiku-20241022': 200000,
+  'claude-3-opus-20240229': 200000,
+  // OpenAI
+  'gpt-4o': 128000,
+  'gpt-4-turbo': 128000,
+  'gpt-4': 8192,
+  o1: 200000,
+  'o3-mini': 200000,
+  // Qwen
+  'qwen3-max-2025-01-23': 128000,
+};
+
+/** Default context window when model is unknown */
+export const DEFAULT_MODEL_TOKEN_LIMIT = 200000;
+
+/**
+ * Get the token limit for a model name.
+ * Falls back to DEFAULT_MODEL_TOKEN_LIMIT for unknown models.
+ */
+export function getModelTokenLimit(model?: string): number {
+  if (!model) return DEFAULT_MODEL_TOKEN_LIMIT;
+  return MODEL_TOKEN_LIMITS[model] ?? DEFAULT_MODEL_TOKEN_LIMIT;
+}
+
 export interface ModelConfig {
   provider: ModelProvider;
   model: string;