Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bun.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
ALTER TABLE "meter_snapshots" ALTER COLUMN "success" SET DATA TYPE boolean;--> statement-breakpoint
ALTER TABLE "meter_snapshots" ALTER COLUMN "success" DROP DEFAULT;--> statement-breakpoint
ALTER TABLE "meter_snapshots" ALTER COLUMN "success" SET DATA TYPE boolean USING (success::integer::boolean);--> statement-breakpoint
ALTER TABLE "meter_snapshots" ALTER COLUMN "success" SET DEFAULT true;
1 change: 1 addition & 0 deletions packages/backend/drizzle/schema/postgres/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export const providers = pgTable(
quotaCheckerEnabled: boolean('quota_checker_enabled').notNull().default(true),
quotaCheckerInterval: integer('quota_checker_interval').notNull().default(30),
quotaCheckerOptions: text('quota_checker_options'), // JSON or encrypted string
quotaCheckerDisableCooldown: boolean('quota_checker_disable_cooldown').notNull().default(false),
// GPU Profile settings — display hint + resolved numeric params
// gpu_profile is kept as a display hint; the 4 numeric fields are the source of truth.
gpuProfile: text('gpu_profile'), // GPU profile name (e.g. 'H100', 'custom') — display hint only
Expand Down
1 change: 1 addition & 0 deletions packages/backend/drizzle/schema/sqlite/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export const providers = sqliteTable(
quotaCheckerEnabled: integer('quota_checker_enabled').notNull().default(1),
quotaCheckerInterval: integer('quota_checker_interval').notNull().default(30),
quotaCheckerOptions: text('quota_checker_options'), // JSON
quotaCheckerDisableCooldown: integer('quota_checker_disable_cooldown').notNull().default(0),
// GPU Profile settings — display hint + resolved numeric params
// gpu_profile is kept as a display hint; the 4 numeric fields are the source of truth.
gpuProfile: text('gpu_profile'), // GPU profile name (e.g. 'H100', 'custom') — display hint only
Expand Down
2 changes: 1 addition & 1 deletion packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"@fastify/multipart": "^10.0.0",
"@fastify/static": "^9.1.3",
"@google/genai": "^1.50.1",
"@mariozechner/pi-ai": "0.70.5",
"@mariozechner/pi-ai": "0.70.6",
"@plexus/shared": "workspace:*",
"@sinclair/typebox": "^0.34.49",
"dotenv": "^17.4.2",
Expand Down
134 changes: 46 additions & 88 deletions packages/backend/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,159 +196,108 @@ const PoeQuotaCheckerOptionsSchema = z.object({
endpoint: z.string().url().optional(),
});

// Common fields shared by every quota checker variant.
const QuotaCheckerBaseSchema = z.object({
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
/**
* When true, the quota scheduler will not inject a provider-wide cooldown
* when utilization exceeds the exhaustion threshold. Quota data is still
* fetched and persisted. Circuit-breaker cooldowns (from failures/429s)
* are unaffected — use the provider-level disable_cooldown for those.
* Defaults to false (current behaviour).
*/
disable_quota_cooldown: z.boolean().optional().default(false),
});

const ProviderQuotaCheckerSchema = z.discriminatedUnion('type', [
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('naga'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: NagaQuotaCheckerOptionsSchema.optional(),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('synthetic'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: SyntheticQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('nanogpt'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: NanoGPTQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('zai'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: ZAIQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('moonshot'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: MoonshotQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('novita'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: NovitaQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('minimax'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: MiniMaxQuotaCheckerOptionsSchema,
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('openrouter'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: OpenRouterQuotaCheckerOptionsSchema,
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('kilo'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: KiloQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('openai-codex'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: OpenAICodexQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('kimi-code'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: KimiCodeQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('claude-code'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: ClaudeCodeQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('copilot'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: CopilotQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('wisdomgate'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: WisdomGateQuotaCheckerOptionsSchema.optional(),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('apertis'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: ApertisQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('minimax-coding'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: MiniMaxCodingQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('poe'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: PoeQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('gemini-cli'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: GeminiCliQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('antigravity'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: AntigravityQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('neuralwatt'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: NeuralwattQuotaCheckerOptionsSchema.optional().default({}),
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('ollama'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: OllamaQuotaCheckerOptionsSchema,
}),
z.object({
QuotaCheckerBaseSchema.extend({
type: z.literal('zenmux'),
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
id: z.string().trim().min(1).optional(),
options: ZenmuxQuotaCheckerOptionsSchema.optional(),
}),
]);
Expand Down Expand Up @@ -560,6 +509,14 @@ const QuotaConfigSchema = z.object({
enabled: z.boolean().default(true),
intervalMinutes: z.number().min(1).default(30),
options: z.record(z.string(), z.any()).default({}),
/**
* When true, the quota scheduler will not inject a provider-wide cooldown
* when utilization exceeds the exhaustion threshold. Quota data is still
* fetched and persisted. Circuit-breaker cooldowns (from failures/429s)
* are unaffected — use the provider-level disable_cooldown for those.
* Defaults to false (current behaviour).
*/
disableQuotaCooldown: z.boolean().default(false),
});

export const McpServerConfigSchema = z.object({
Expand Down Expand Up @@ -829,6 +786,7 @@ function buildProviderQuotaConfigs(config: z.infer<typeof RawPlexusConfigSchema>
enabled: true,
intervalMinutes: quotaChecker.intervalMinutes,
options,
disableQuotaCooldown: quotaChecker.disable_quota_cooldown === true,
});
}

Expand Down
2 changes: 2 additions & 0 deletions packages/backend/src/db/config-repository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ export class ConfigRepository {
quotaCheckerOptions: config.quota_checker?.options
? encryptJsonField(config.quota_checker.options)
: null,
quotaCheckerDisableCooldown: fromBool(config.quota_checker?.disable_quota_cooldown === true),
// GPU Profile settings for inference energy calculation
gpuProfile: config.gpu_profile ?? null,
gpuRamGb: config.gpu_ram_gb ?? null,
Expand Down Expand Up @@ -442,6 +443,7 @@ export class ConfigRepository {
type: row.quotaCheckerType,
enabled: toBool(row.quotaCheckerEnabled),
intervalMinutes: row.quotaCheckerInterval,
disable_quota_cooldown: toBool(row.quotaCheckerDisableCooldown),
...(row.quotaCheckerId ? { id: row.quotaCheckerId } : {}),
...(row.quotaCheckerOptions ? { options: decryptJsonField(row.quotaCheckerOptions) } : {}),
};
Expand Down
1 change: 1 addition & 0 deletions packages/backend/src/services/config-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ export class ConfigService {
enabled: true,
intervalMinutes: quotaChecker.intervalMinutes,
options,
disableQuotaCooldown: quotaChecker.disable_quota_cooldown === true,
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@ import type { QuotaConfig } from '../../../config';
const CHECKER_ID = 'quota-persistence-checker';

const makeConfig = (
overrides: Partial<{ maxUtilizationPercent: number }> & { id?: string; provider?: string } = {}
overrides: Partial<{ maxUtilizationPercent: number; disableQuotaCooldown: boolean }> & {
id?: string;
provider?: string;
} = {}
): QuotaConfig => ({
id: overrides.id ?? CHECKER_ID,
provider: overrides.provider ?? 'test-provider',
type: 'synthetic',
enabled: true,
intervalMinutes: 60,
disableQuotaCooldown: overrides.disableQuotaCooldown ?? false,
options: {
...(overrides.maxUtilizationPercent !== undefined
? { maxUtilizationPercent: overrides.maxUtilizationPercent }
Expand Down Expand Up @@ -279,3 +283,76 @@ describe('QuotaScheduler maxUtilizationPercent', () => {
expect(isHealthy).toBe(false);
});
});

describe('QuotaScheduler disableQuotaCooldown', () => {
const PROVIDER = 'disable-quota-cooldown-test-provider';

beforeEach(async () => {
await closeDatabase();
process.env.DATABASE_URL = process.env.PLEXUS_TEST_DB_URL ?? process.env.DATABASE_URL;
initializeDatabase(process.env.DATABASE_URL);
await runMigrations();

const db = getDatabase() as any;
const schema = getSchema() as any;
await db.delete(schema.meterSnapshots);
});

afterEach(async () => {
QuotaScheduler.getInstance().stop();
const cooldownManager = CooldownManager.getInstance();
await cooldownManager.markProviderSuccess(PROVIDER, '');
await closeDatabase();
});

it('does not inject a quota cooldown when disableQuotaCooldown is true, even at 100% utilization', async () => {
const scheduler = QuotaScheduler.getInstance() as any;
const config = makeConfig({ provider: PROVIDER, disableQuotaCooldown: true });
scheduler.configs.set('no-quota-cooldown-checker', config);

await scheduler.applyCooldownsFromResult(
makeMeterResult(100, 'no-quota-cooldown-checker', PROVIDER),
config
);

const isHealthy = await CooldownManager.getInstance().isProviderHealthy(PROVIDER, '');
expect(isHealthy).toBe(true);
});

it('still injects a cooldown when disableQuotaCooldown is false (default)', async () => {
const scheduler = QuotaScheduler.getInstance() as any;
const config = makeConfig({ provider: PROVIDER, disableQuotaCooldown: false });
scheduler.configs.set('with-quota-cooldown-checker', config);

await scheduler.applyCooldownsFromResult(
makeMeterResult(99, 'with-quota-cooldown-checker', PROVIDER),
config
);

const isHealthy = await CooldownManager.getInstance().isProviderHealthy(PROVIDER, '');
expect(isHealthy).toBe(false);
});

it('disableQuotaCooldown does not affect quota data persistence', async () => {
const scheduler = QuotaScheduler.getInstance() as any;
const config = makeConfig({ provider: PROVIDER, disableQuotaCooldown: true });
scheduler.configs.set('no-quota-cooldown-persist-checker', config);

const result = makeMeterResult(100, 'no-quota-cooldown-persist-checker', PROVIDER);
await scheduler.persistResult(result);
await scheduler.applyCooldownsFromResult(result, config);

const db = getDatabase() as any;
const schema = getSchema() as any;
const rows = await db
.select()
.from(schema.meterSnapshots)
.where(eq(schema.meterSnapshots.checkerId, 'no-quota-cooldown-persist-checker'));

// Meter data was still persisted
expect(rows.length).toBeGreaterThan(0);
// But no cooldown was injected
const isHealthy = await CooldownManager.getInstance().isProviderHealthy(PROVIDER, '');
expect(isHealthy).toBe(true);
});
});
Loading
Loading