Skip to content

Commit f01d3d0

Browse files
author
StackMemory Bot (CLI)
committed
feat(compact): auto-compact at 95% of model token limit
CompactionHandler now derives thresholds from model context window: - warning at 90%, critical/auto-compact at 95% - Accepts model name or explicit limit, defaults to 200K - Dashboard uses dynamic limit via getModelTokenLimit()
1 parent 41c65e8 commit f01d3d0

3 files changed

Lines changed: 68 additions & 4 deletions

File tree

src/cli/commands/dashboard.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { FrameManager } from '../../core/context/index.js';
1010
import Database from 'better-sqlite3';
1111
import { join } from 'path';
1212
import { existsSync } from 'fs';
13+
import { getModelTokenLimit } from '../../core/models/model-router.js';
1314

1415
/** Frame statistics row */
1516
interface FrameStatsRow {
@@ -280,7 +281,7 @@ async function estimateContextUsage(db: Database): Promise<number> {
280281
// Rough estimate: assume average token is 4 bytes
281282
const totalBytes = (result?.input_size || 0) + (result?.output_size || 0);
282283
const estimatedTokens = totalBytes / 4;
283-
const maxTokens = 128000; // Claude's context window
284+
const maxTokens = getModelTokenLimit(process.env.ANTHROPIC_MODEL);
284285

285286
return Math.round((estimatedTokens / maxTokens) * 100);
286287
}

src/core/context/enhanced-rehydration.ts

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ import * as path from 'path';
1111
import { logger } from '../monitoring/logger.js';
1212
import { FrameManager } from './index.js';
1313
import type { Anchor, Event } from './index.js';
14+
import {
15+
getModelTokenLimit,
16+
DEFAULT_MODEL_TOKEN_LIMIT,
17+
} from '../models/model-router.js';
1418

1519
// ============================================================================
1620
// Compaction Handler Types
@@ -72,17 +76,40 @@ export class CompactionHandler {
7276
private metrics: CompactionMetrics;
7377
private tokenAccumulator: number = 0;
7478
private preservedAnchors: Map<string, CriticalContextAnchor> = new Map();
79+
private modelTokenLimit: number;
7580

76-
constructor(frameManager: FrameManager) {
81+
/**
82+
* @param frameManager - Frame manager instance
83+
* @param modelOrLimit - Model name string (looked up in MODEL_TOKEN_LIMITS)
84+
* or explicit numeric token limit.
85+
* Defaults to DEFAULT_MODEL_TOKEN_LIMIT (200K).
86+
*
87+
* Thresholds are derived from the model limit:
88+
* warning = 90% of limit
89+
* critical = 95% of limit (auto-compact trigger)
90+
*/
91+
constructor(frameManager: FrameManager, modelOrLimit?: string | number) {
7792
this.frameManager = frameManager;
93+
this.modelTokenLimit =
94+
typeof modelOrLimit === 'number'
95+
? modelOrLimit
96+
: getModelTokenLimit(modelOrLimit ?? undefined);
97+
7898
this.metrics = {
7999
estimatedTokens: 0,
80-
warningThreshold: 150000, // 150K tokens
81-
criticalThreshold: 170000, // 170K tokens
100+
warningThreshold: Math.floor(this.modelTokenLimit * 0.9),
101+
criticalThreshold: Math.floor(this.modelTokenLimit * 0.95),
82102
anchorsPreserved: 0,
83103
};
84104
}
85105

106+
/**
107+
* Get the resolved model token limit
108+
*/
109+
getModelTokenLimit(): number {
110+
return this.modelTokenLimit;
111+
}
112+
86113
/**
87114
* Track token usage from a message
88115
*/

src/core/models/model-router.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,42 @@ export type ModelProvider =
2020
| 'custom';
2121
export type TaskType = 'default' | 'plan' | 'think' | 'code' | 'review';
2222

23+
/**
24+
* Known context window sizes (max tokens) for popular models.
25+
* Used by CompactionHandler to compute auto-compact thresholds.
26+
*/
27+
export const MODEL_TOKEN_LIMITS: Record<string, number> = {
28+
// Claude 4.x / 4.5 / 4.6
29+
'claude-opus-4-6': 200000,
30+
'claude-sonnet-4-5-20250929': 200000,
31+
'claude-haiku-4-5-20251001': 200000,
32+
'claude-sonnet-4-20250514': 200000,
33+
// Claude 3.x
34+
'claude-3-5-sonnet-20241022': 200000,
35+
'claude-3-5-haiku-20241022': 200000,
36+
'claude-3-opus-20240229': 200000,
37+
// OpenAI
38+
'gpt-4o': 128000,
39+
'gpt-4-turbo': 128000,
40+
'gpt-4': 8192,
41+
o1: 200000,
42+
'o3-mini': 200000,
43+
// Qwen
44+
'qwen3-max-2025-01-23': 128000,
45+
};
46+
47+
/** Default context window when model is unknown */
48+
export const DEFAULT_MODEL_TOKEN_LIMIT = 200000;
49+
50+
/**
51+
* Get the token limit for a model name.
52+
* Falls back to DEFAULT_MODEL_TOKEN_LIMIT for unknown models.
53+
*/
54+
export function getModelTokenLimit(model?: string): number {
55+
if (!model) return DEFAULT_MODEL_TOKEN_LIMIT;
56+
return MODEL_TOKEN_LIMITS[model] ?? DEFAULT_MODEL_TOKEN_LIMIT;
57+
}
58+
2359
export interface ModelConfig {
2460
provider: ModelProvider;
2561
model: string;

0 commit comments

Comments
 (0)