diff --git a/.env.example b/.env.example index 102f5c5fa..d1e384e07 100644 --- a/.env.example +++ b/.env.example @@ -74,6 +74,8 @@ CSRF_SECRET= # - Session 追踪:5 分钟上下文缓存优化(避免频繁切换供应商) # - Fail Open 策略:Redis 不可用时自动降级,不影响服务可用性 ENABLE_RATE_LIMIT=true # 是否启用限流功能(默认:true) +ENABLE_MODEL_RATE_LIMIT=false # 是否启用按模型维度限额(默认:false;依赖 ENABLE_RATE_LIMIT=true) +MODEL_RATE_LIMIT_FAIL_OPEN=true # 按模型限额在 Redis 故障时是否 fail-open(默认:true,与主线一致) REDIS_URL=redis://localhost:6379 # Redis 连接地址(Docker 部署使用 redis://redis:6379,支持 rediss:// TLS) REDIS_TLS_REJECT_UNAUTHORIZED=true # 是否验证 Redis TLS 证书(默认:true) # 设置为 false 可跳过证书验证,用于自签证书或共享证书场景 diff --git a/CHANGELOG.md b/CHANGELOG.md index 185a5d302..5149509d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ --- +## 未发布 (Unreleased) + +### 新增 + +- 用户组 × 模型组限额(Group Rate Limit):将「按模型限额」重构为两维度模型——**模型组**(一组模型,全局互斥归属)× **限额主体**(用户 / 用户组 / 密钥),可为每个 (主体 × 模型组) 设置 5 小时/每日/每周/每月/总额成本上限。多来源(个人行 + 用户组上限)按**取最大值**合并,用户组限额为**人均上限**。支持**临时提额**授予(按用户 × 模型组 × 窗口,带有效期,到点即时生效/失效,叠加在有效上限之上)。 + - **完全切分**:命中某轴(用户或密钥)模型组限额后,该轴消费既**跳过**主线全局成本闸门、也**不计入**该轴主线全局额(通过 `usage_ledger` 按轴打标 `counted_in_user_global` / `counted_in_key_global` 实现,DB 聚合、Redis 回填、展示分栏三处同源);RPM 与并发护栏始终生效。Redis 故障时按 `MODEL_RATE_LIMIT_FAIL_OPEN` fail-open,且 fail-open **不**置旁路标记以防双重放行。 + - 新增模块:schema 五表 + 两枚举 + `usage_ledger`/`message_request` 打标两列、解析快照缓存(SWR + pub/sub 失效)、桶 lease 计量、guard 接入、模型组/用户组/限额/提额 Admin REST API、Dashboard 管理界面(模型组、用户组、按模型限额含提额内嵌),5 语言 i18n。 + - 通过 `ENABLE_MODEL_RATE_LIMIT` 开关控制,默认关闭,关闭时与主线逐字节一致。提额到点生效为内存精确判定;增删授予最长一个缓存 TTL 后对线上请求生效。 + - 已知后续项:OPT-B 模型维度 lease 百分比(`quotaModelLeasePercent*` / `quotaModelLeaseMinSliceUsd`)当前未配置时回退主线百分比;真实 PG+Redis 的集成/E2E 测试待在具备数据库的环境中补充。 + +--- + ## v0.8.5 (2026-06-08) ### 新增 diff --git a/CLAUDE.md b/CLAUDE.md index 4c628b6a1..266598b26 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,12 +6,13 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co - **Source**: https://github.com/ding113/claude-code-hub - **PR Target Branch**: `dev` (all pull requests must target the dev branch) +- **Branching & commit conventions**: see @CONTRIBUTING.md (Conventional Commits, `feature/*` / `fix/*` branches, squash-merge to `dev`) ## Critical Rules -1. **No Emoji in Code** - Never use emoji characters in any code, comments, or string literals +1. **No Emoji in Code** - Never use emoji characters in any code, comments, or string literals (verify: `bun run i18n:audit-messages-no-emoji`) 2. **Test Coverage** - All new features must have unit test coverage of at least 80% -3. **i18n Required** - All user-facing strings must use i18n (5 languages supported). Never hardcode display text +3. **i18n Required** - All user-facing strings must use i18n (5 languages supported). Message files live at `messages//
.json`. Verify placeholders: `bun run i18n:audit-placeholders` 4. **Pre-commit Checklist** - Before committing, always run: ```bash bun run build # Production build @@ -44,6 +45,9 @@ bun run test:ui # Interactive test UI bun run test:coverage # Coverage report bunx vitest run # Run single test file bunx vitest run -t "test name" # Run specific test +bun run test:integration # Run integration tests (separate config) +bun run test:e2e # Run e2e tests (separate config) +bun run test:v1 # API v1 critical-path coverage check # Dev environment (via dev/Makefile) cd dev && make dev # Start all services (PG + Redis + app) @@ -65,6 +69,7 @@ bun run db:generate # Generate Drizzle migrations from schema changes bun run db:migrate # Apply migrations bun run db:push # Push schema changes (dev only) bun run db:studio # Open Drizzle Studio +bun run validate:migrations # Verify generated migration files are consistent ``` ## Architecture Overview @@ -123,6 +128,13 @@ Key components: - **Legacy Management API**: `/api/actions/{module}/{action}` - Deprecated Server Action adapter, retained behind `ENABLE_LEGACY_ACTIONS_API` - **Docs**: `/api/v1/scalar` (Scalar UI), `/api/v1/docs` (Swagger), `/api/v1/openapi.json` - **OpenAPI checks**: `bun run test:v1`, `bun run openapi:check`, `bun run openapi:lint` +- **OpenAPI codegen**: `bun run openapi:generate` regenerates TypeScript types from the OpenAPI schema + +### MCP Servers +Configured in `.mcp.json` — prefer these over reinventing: +- `db` (Bytebase DBHub): introspect Postgres schema/data directly +- `shadcn`: search/install shadcn/ui components into the project +- `chrome-devtools`: browser automation for E2E debugging ## Code Conventions diff --git a/docker-compose.local.yaml b/docker-compose.local.yaml new file mode 100644 index 000000000..fdcf4b429 --- /dev/null +++ b/docker-compose.local.yaml @@ -0,0 +1,7 @@ +services: + app: + image: claude-code-hub:local + environment: + ENABLE_RATE_LIMIT: "true" + ENABLE_MODEL_RATE_LIMIT: "true" + AUTO_MIGRATE: "true" diff --git a/docs/api/v1/README.md b/docs/api/v1/README.md index ee5d467b7..f75a8115b 100644 --- a/docs/api/v1/README.md +++ b/docs/api/v1/README.md @@ -18,6 +18,10 @@ traffic can converge without reimplementing business rules. Every response includes `X-API-Version: 1.0.0`. +### Resource guides + +- [Per-Model Limits](./model-limits.md): admin endpoints for per-model cost limits. + ## Authentication The API accepts three credential transports: diff --git a/docs/api/v1/model-limits.md b/docs/api/v1/model-limits.md new file mode 100644 index 000000000..50e4ecd07 --- /dev/null +++ b/docs/api/v1/model-limits.md @@ -0,0 +1,114 @@ +# Per-Model Limits API + +Admin endpoints for managing per-model cost limits scoped to a user or an API +key. These complement the mainline user/key quotas by letting you cap spend on a +single model (or all models via a `*` wildcard) without affecting the shared +account-level budget. + +See the OpenAPI surface for the authoritative schema: + +- OpenAPI JSON: `/api/v1/openapi.json` +- Scalar UI: `/api/v1/scalar` (tag: `Model Limits`) + +## Feature flag + +Per-model limiting is opt-in and is enforced only when both flags are set: + +- `ENABLE_MODEL_RATE_LIMIT=true` (default `false`) +- `ENABLE_RATE_LIMIT=true` (default `true`) + +The management endpoints below are always available to admins regardless of the +flag, so limits can be configured ahead of enabling enforcement. When the flag +is off, configured limits are stored but never evaluated, and the request path +is unchanged. + +## Authentication + +All endpoints require `admin` access (session cookie, opaque session bearer +token, or `ADMIN_TOKEN`; user API keys are rejected unless +`ENABLE_API_KEY_ADMIN_ACCESS=true` for an admin-owned key). Cookie-authenticated +mutations must include the CSRF token from `GET /api/v1/auth/csrf`. + +Errors use the standard `application/problem+json` envelope. Notable codes: + +- `model_limit.not_found` (404): the targeted limit row does not exist. +- `model_limit.action_failed` (400): the underlying action rejected the input. +- `auth.forbidden` (403): caller lacks admin access. + +## Endpoints + +| Method | Path | Description | +| --- | --- | --- | +| `GET` | `/api/v1/model-limits/users/{userId}` | List a user's per-model limits | +| `POST` | `/api/v1/model-limits/users/{userId}` | Create or update a user limit (`model` in body) | +| `DELETE` | `/api/v1/model-limits/users/{userId}/{model}` | Delete a user limit | +| `GET` | `/api/v1/model-limits/keys/{keyId}` | List a key's per-model limits | +| `POST` | `/api/v1/model-limits/keys/{keyId}` | Create or update a key limit (`model` in body) | +| `DELETE` | `/api/v1/model-limits/keys/{keyId}/{model}` | Delete a key limit | + +For `DELETE`, URL-encode the `model` path segment. The wildcard `*` is +`%2A` (e.g. `/api/v1/model-limits/keys/42/%2A`). + +### List response + +```json +{ + "items": [ + { + "scopeType": "user", + "scopeId": 7, + "model": "claude-opus-4", + "rpmLimit": null, + "limit5hUsd": 2.5, + "limit5hResetMode": "fixed", + "dailyLimitUsd": 10, + "limitWeeklyUsd": null, + "limitMonthlyUsd": 100, + "limitTotalUsd": null, + "limit5hCostResetAt": null + } + ] +} +``` + +### Upsert body + +```json +{ + "model": "claude-opus-4", + "limit5hUsd": 2.5, + "limit5hResetMode": "fixed", + "dailyLimitUsd": 10, + "limitWeeklyUsd": null, + "limitMonthlyUsd": 100, + "limitTotalUsd": null +} +``` + +- `model` is required (1-128 chars). Use `*` for an all-models fallback. +- Each USD field is optional. Omit a field to leave it unchanged on update; + send `null` to clear it (unlimited for that window). +- `limit5hResetMode` is `fixed` or `rolling` and applies to the 5-hour window. +- `rpmLimit` is reserved for a future release and is not enforced. + +The endpoint upserts on `(scope, model)` and returns the resulting row (HTTP +200). `DELETE` returns HTTP 204 with no body. + +## Resolution semantics + +When a request is evaluated, the most specific matching limit is chosen via a +4-level lookup (first match wins; no stacking): + +1. key + exact model +2. key + `*` +3. user + exact model +4. user + `*` + +If none match, no per-model limit applies and the request continues under the +mainline user/key quotas only. + +Usage is metered on the resolved (post-redirect) model name, consistent with the +`model` column stored in `usage_ledger`. Limits reuse the mainline lease +mechanism (PostgreSQL as the authoritative source, Redis lease slices, atomic +decrement). On Redis failure the limiter fails open by default +(`MODEL_RATE_LIMIT_FAIL_OPEN=true`). diff --git a/docs/limit/README.md b/docs/limit/README.md new file mode 100644 index 000000000..292edf88e --- /dev/null +++ b/docs/limit/README.md @@ -0,0 +1,465 @@ +# Per-Model 限额扩展模块 — 实施计划 + +> [!WARNING] +> **本文档为按模型 (user/key × model) 维度的初版计划(Phase 1-5,未上线)。已被修订版方案取代。** +> 修订版引入「模型组 + 用户组」维度、`取最大值` 合并语义、临时提额,并把命中语义改为「按轴覆盖全局成本限额」, +> 详见 [`group-rate-limit.md`](./group-rate-limit.md)。 +> 由于初版未上线,修订版**废弃**本文所述的 `user_model_limits` / `key_model_limits` 两表, +> 改以 `model_groups` / `model_group_members` / `user_groups` / `model_group_limits` 四表重建(单模型 = 单元素模型组)。 +> 「不破坏原有逻辑」专指 `origin/main` 主线。本文保留作初版基线参考。 + +> 目标:在不破坏社区主线代码的前提下,为现有用户/Key 限额系统增加**按模型维度**的限额能力。 +> 设计原则:新增独立模块(`src/lib/model-rate-limit/` + 对应 schema/API/UI 子路径),**原有限额逻辑零改动**;仅对 guard pipeline 做一次最小化的扩展点开放。 + +--- + +## 1. 现状摘要 + +现有限额体系(社区主线): + +- **数据层**(`src/drizzle/schema.ts`):`users` / `keys` / `providers` 三张表各自携带一组**周期成本限额**列:`limit_5h_usd` / `daily_limit_usd` / `limit_weekly_usd` / `limit_monthly_usd` / `limit_total_usd`,以及 `rpm_limit`、`limit_concurrent_sessions`、`limit_5h_reset_mode`(fixed/rolling)。 +- **服务层**(`src/lib/rate-limit/`):`RateLimitService` 以 Redis 为主、PG 为兜底,存储 key 形如 `{type}:{id}:cost_{period}_{mode}`;rolling 周期用 ZSET + Lua,fixed 周期用 INCRBYFLOAT。 +- **执行点**(`src/app/v1/_lib/proxy/rate-limit-guard.ts`):在 `CHAT_PIPELINE` 中以 `rateLimit` 步骤注入,先 user 再 key、先 total 再细粒度,违规抛 `RateLimitError`。 +- **Pipeline 注册表**(`src/app/v1/_lib/proxy/guard-pipeline.ts`):`Steps` 是一个**硬编码** record,`CHAT_PIPELINE` 是固定 array。 +- **特性开关**:`ENABLE_RATE_LIMIT` 控制总开关;关闭时 Redis 订阅与计数都跳过。 +- **模型信息**:`session.getCurrentModel()` 返回重定向后的归一化模型名(另有 `getOriginalModel()` 为用户请求的原模型),**在 rateLimit guard 执行前已就绪**(model guard 排在更前)。 +- **限额计数机制**(关键):主线限额**已全面采用 lease 模式**——DB(`usage_ledger`)为权威用量源,Redis 存预扣"切片",原子 Lua 扣减。检查走 `RateLimitService.checkCostLimitsWithLease()`,回填走 `response-handler.ts` 的 `trackCost()` + `decrementLeaseBudget()`(fire-and-forget)。`usage_ledger` 已含 `model` 列(`schema.ts:989`),支持按模型维度的 DB 权威聚合。 + +> 代码核对修正记录(本计划已对照源码核实): +> - `GuardStep` 实际接口为 `{ name; execute(session): Promise }`(`guard-pipeline.ts:23`),违规通过 `throw RateLimitError` 冒泡,**非** `ensure(): void`。 +> - 模型访问器是 `getCurrentModel()` / `getOriginalModel()`,**无** `getModel()`。 +> - 成本回填点是 `response-handler.ts:3939`(`trackCost`)+ `:3970`(`decrementLeaseBudget`),**无 ledger 事件总线**,故 model 计数需在此挂钩(这是一处诚实的主线 diff,原 §14 漏算)。 +> - `Steps` 是封闭联合类型 `Record`,扩展步骤须在 `build()` 解析数组后按 name splice 注入,不能进 key map。 + +**痛点**:现有 5h/daily/weekly/monthly 限额是"用户总额度",无法区分模型。同一用户对 `claude-opus-4` 与 `claude-haiku-4.5` 共享同一桶,价格悬殊导致策略粗糙。 + +--- + +## 2. 设计概要 + +### 2.1 范围 + +- 维度:`(scope_type, scope_id, model)` — 其中 `scope_type ∈ {user, key}`,model 为归一化字符串。允许通配符 `*` 兜底。 +- **模型计量口径(已决策)**:按 `session.getCurrentModel()`(重定向后的实际服务模型)计量,与 `usage_ledger.model` 存储口径一致,保证 lease 的 DB 权威聚合对齐。 +- 周期:复用现有 5h / daily / weekly / monthly / total 五档,及 fixed/rolling 模式;RPM 与并发**暂不纳入第一版**(避免对热路径写入造成翻倍压力)。 +- **计数机制(已决策)**:复刻主线 **lease 模式**(DB 权威 + Redis 切片 + 原子 Lua 扣减),**不**采用独立 ZSET/STRING 计数(详见 §5)。 +- 评估时机:在现有 `rateLimit` guard **之后** 执行新 `modelRateLimit` guard。即使新 guard 失败,原有用户/key 总额限制仍生效。 +- 失败语义:复用 `RateLimitError`,新增错误码前缀 `MODEL_*`(如 `MODEL_RATE_LIMIT_DAILY_QUOTA_EXCEEDED`)。 + +### 2.2 模块边界 + +| 模块 | 路径 | 是否新建 | +|---|---|---| +| Schema | `src/drizzle/schema.ts` 中 **追加两张新表** `userModelLimits` / `keyModelLimits` | 追加,不改老列 | +| Repository (CRUD) | `src/repository/model-limit.ts` | 新建 | +| Repository (聚合) | 按模型聚合用量:`sumUserCostByModelInTimeRange` / `sumKeyCostByModelInTimeRange`(lease DB 权威源) | 新建(沿用现有 `sumUserCostInTimeRange` 加 `model` 过滤) | +| Service | `src/lib/model-rate-limit/{service,lease,keys,resolver,types}.ts`(复刻 lease,复用 `lib/rate-limit/lease.ts` 纯函数) | 新建 | +| Guard | `src/app/v1/_lib/proxy/model-rate-limit-guard.ts` | 新建 | +| Pipeline 接入 | `guard-pipeline.ts` 扩展钩子(见 §3)| 一次性最小修改 | +| 成本回填挂钩 | `response-handler.ts:3970` 的 `decrementLeaseBudget` 数组内追加 model 维度扣减(见 §7) | 一次性最小修改(flag 守卫) | +| Admin API | `src/app/api/v1/resources/model-limits/{router,handlers}.ts` + `_root/app.ts` 挂载一行 | 新建 + 一行注册 | +| Server Action | `src/actions/model-limit.ts` | 新建 | +| Dashboard UI | `src/app/[locale]/dashboard/quotas/model-limits/` | 新建子路由 | +| i18n | `messages//quota.json` 内追加 `modelLimits` 子节 | 追加,不改老 key | +| 启动注册 | `instrumentation.ts` 的 `register()` 内触发扩展注册 | +3 行 | +| Feature flag | `ENABLE_MODEL_RATE_LIMIT` | 新增 env | + +--- + +## 3. 与 Guard Pipeline 的集成(核心折中点) + +社区主线的 `guard-pipeline.ts` 把所有步骤写死。**已决策采用方案 A**(开放扩展钩子)。方案 B(复制预设 + 入口切换)记录在文末备查,不再采用。 + +### 方案 A — 一次性开放扩展点(已采用,修正版) + +> 修正:`Steps` 是封闭联合类型 `Record`,`build()` 做 `config.steps.map(k => Steps[k])`。扩展步骤**不能**注入 key map,必须在 `build()` 解析出 `GuardStep[]` 之后、按锚点步骤的 `name` splice 注入。 + +```ts +// guard-pipeline.ts —— 一次性追加(约 25 行) +export interface ExtensionStep { + key: string; // 唯一标识,用于幂等去重(dev 热重载) + step: GuardStep; // 自带 name + execute(session): Promise + insertAfter: GuardStepKey; // 锚点步骤名 +} +const extensions: ExtensionStep[] = []; + +export function registerExtensionStep(ext: ExtensionStep): void { + if (extensions.some((e) => e.key === ext.key)) return; // 幂等 + extensions.push(ext); +} + +// GuardPipelineBuilder.build() 内,解析数组后注入: +static build(config: GuardConfig): GuardPipeline { + const steps: GuardStep[] = config.steps.map((k) => Steps[k]); + for (const ext of extensions) { + const idx = steps.findIndex((s) => s.name === ext.insertAfter); + if (idx >= 0) steps.splice(idx + 1, 0, ext.step); // 锚点不在该 preset 则自动跳过 + } + return { + async run(session) { + for (const s of steps) { + const res = await s.execute(session); + if (res) return res; // early exit + } + return null; + }, + }; +} +``` + +**语义红利**:只有含 `rateLimit` 的 `CHAT_PIPELINE` 才命中锚点;`RAW_PASSTHROUGH_PIPELINE` / `COUNT_TOKENS_PIPELINE` 没有 `rateLimit`,自动不挂 model 限额——无需任何额外判断,天然正确。 + +新模块在 `instrumentation.ts` 的 `register()`(line 243)内触发注册(见 §8),调用 `registerExtensionStep({ key: "modelRateLimit", step: ModelRateLimitGuard, insertAfter: "rateLimit" })`。 + +**优点**:所有未来限额扩展都走同一钩子;主线只动一次,语义清晰。 +**代价**:动了主线一文件(约 +25 行),不能称"零修改",但量极小。重构 `guard-pipeline.ts` 时须保持该公开钩子向后兼容(写一行单测固化)。 + +--- + +#### 备查:方案 B(已否决) + +复制预设 `CHAT_PIPELINE_WITH_MODEL_LIMIT = [...CHAT_PIPELINE, "modelRateLimit"]` + 在 `proxy-handler.ts` 入口按 flag 切换。否决理由:仍需改 `proxy-handler.ts`,且 `modelRateLimit` 仍需进 `Steps`/`GuardStepKey`(封闭联合)才能被预设引用,反而比方案 A 改动更多;叠加未来扩展时复制成本递增。 + +--- + +## 4. 数据库 Schema + +新增两张表(drizzle 写法),不动 `users` / `keys`: + +```ts +// src/drizzle/schema.ts —— 追加在 keys/users 表后 +export const userModelLimits = pgTable("user_model_limits", { + id: serial("id").primaryKey(), + userId: integer("user_id").notNull().references(() => users.id, { onDelete: "cascade" }), + model: varchar("model", { length: 128 }).notNull(), // "*" = 兜底 + rpmLimit: integer("rpm_limit"), // 预留,第一版不强制 + dailyLimitUsd: numeric("daily_limit_usd", { precision: 10, scale: 2 }), + limit5hUsd: numeric("limit_5h_usd", { precision: 10, scale: 2 }), + limit5hResetMode: dailyResetModeEnum("limit_5h_reset_mode").default("fixed"), + limitWeeklyUsd: numeric("limit_weekly_usd", { precision: 10, scale: 2 }), + limitMonthlyUsd: numeric("limit_monthly_usd", { precision: 10, scale: 2 }), + limitTotalUsd: numeric("limit_total_usd", { precision: 10, scale: 2 }), + limit5hCostResetAt: timestamp("limit_5h_cost_reset_at", { withTimezone: true }), + createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(), + updatedAt: timestamp("updated_at", { withTimezone: true }).defaultNow().notNull(), +}, (t) => ({ + uniqUserModel: uniqueIndex("user_model_limits_user_model_idx").on(t.userId, t.model), + byUser: index("user_model_limits_user_idx").on(t.userId), +})); + +export const keyModelLimits = pgTable("key_model_limits", { + // 同上结构,外键 keyId -> keys.id +}); +``` + +**迁移流程**:按 CLAUDE.md 规定走 `bun run db:generate` → review 生成文件 → `bun run db:migrate`;生成后立即跑 `bun run validate:migrations`。 + +**冲突解析顺序**(运行时): +1. `keyModelLimits(keyId, model)` 命中 → 用之 +2. 否则 `keyModelLimits(keyId, "*")` 命中 → 用之 +3. 否则 `userModelLimits(userId, model)` 命中 → 用之 +4. 否则 `userModelLimits(userId, "*")` 命中 → 用之 +5. 否则不限制(继续走主线限额) + +--- + +## 5. 计数机制 — lease 复刻(已决策) + +**决策**:复刻主线 lease 模式,**不**采用独立 ZSET/STRING 计数。理由:(1) 并发安全性已在主线验证(原子 Lua 扣减);(2) `usage_ledger.model` 列已支持 DB 权威按模型聚合,无需另建权威源;(3) 避免独立计数器与 DB 漂移。 + +### 5.1 数据流(与主线一致,加 model 维度) + +``` +检查(guard):读 Redis lease 切片 → 缺失则查 DB 聚合(sumXxxCostByModelInTimeRange) → 切片写回 Redis → 原子 Lua 扣减判断越界 +回填(response-handler):trackCost 落 usage_ledger(已有 model 列)+ decrementLeaseBudget(model 维度) fire-and-forget +``` + +### 5.2 Redis lease key(加 model 段,与主线 `lease:` 前缀分隔) + +``` +lease:user-model:{userId}:{modelHash}:5h:{resetMode} +lease:user-model:{userId}:{modelHash}:daily:{resetMode} +lease:user-model:{userId}:{modelHash}:weekly +lease:user-model:{userId}:{modelHash}:monthly +lease:key-model:{keyId}:{modelHash}:... +``` + +- `modelHash` = `sha1(normalizedModel).slice(0,16)`,避免 `/`、`:` 等字符污染 key。原始 model 字符串落表,hash 仅作 Redis key。 +- `lease:user-model:` / `lease:key-model:` 前缀与主线 `lease:user:` / `lease:key:` 完全分隔,互不踩。 +- total 维度不走 lease 窗口(无窗口 TTL),直接查 DB 聚合 `usage_ledger` 比对 `limitTotalUsd`,与主线 `checkTotalCostLimit` 一致。 + +### 5.3 复用与新建 + +- **复用**(`src/lib/rate-limit/lease.ts` 纯函数,无副作用):`calculateLeaseSlice` / `serializeLease` / `deserializeLease` / `isLeaseExpired`。 +- **新建** `src/lib/model-rate-limit/lease.ts`:`ModelLeaseService`,平行于 `LeaseService`,内部走同一"读切片→查 DB→扣减"流程,但 DB 源换成按模型聚合查询。 +- **新建** repository 聚合:`sumUserCostByModelInTimeRange(userId, model, start, end)` / `sumKeyCostByModelInTimeRange(keyId, model, start, end)`,即现有 `sumUserCostInTimeRange` 加 `AND model = ?`。 +- Redis 故障时 fail-open(`MODEL_RATE_LIMIT_FAIL_OPEN`,与主线一致)。 + +--- + +## 6. 模块文件清单与职责 + +``` +src/lib/model-rate-limit/ +├── keys.ts # Redis lease key 构造 + modelHash +├── lease.ts # ModelLeaseService:复刻 LeaseService,复用 rate-limit/lease.ts 纯函数 +├── service.ts # ModelRateLimitService:checkCostLimitsWithLease() / decrementLease() +├── resolver.ts # 限额查找:4 级冲突解析 +├── register.ts # registerModelRateLimitExtension() → 调 registerExtensionStep() +└── types.ts # 限额 DTO + flag 读取(isModelRateLimitEnabled) + +src/app/v1/_lib/proxy/ +└── model-rate-limit-guard.ts # execute(session): Promise,越界 throw RateLimitError + +src/repository/ +├── model-limit.ts # CRUD:findByUser / findByKey / upsert / delete +└── (聚合查询) # sumUserCostByModelInTimeRange / sumKeyCostByModelInTimeRange + # 就近放在现有 sum*CostInTimeRange 所在文件,加 model 过滤 + +src/actions/ +└── model-limit.ts # "use server" — ActionResult 包装 + +src/app/api/v1/resources/model-limits/ +├── handlers.ts # listForUser / listForKey / upsert / delete +└── router.ts # Zod-OpenAPI Hono router;挂载到 /api/v1/resources/model-limits + +src/app/[locale]/dashboard/quotas/model-limits/ +├── page.tsx # 列表 + 筛选(用户/Key) +└── _components/ + ├── ModelLimitTable.tsx + ├── EditModelLimitDialog.tsx + └── BulkImportDialog.tsx # 可选:CSV 批量导入 + +messages//quota.json +└── 追加 "modelLimits": { ... } # 5 个语言文件同步 + +tests/unit/proxy/model-rate-limit-guard.test.ts +tests/unit/lib/model-rate-limit/service.test.ts +tests/unit/repository/model-limit.test.ts +tests/integration/model-rate-limit.test.ts +``` + +--- + +## 7. Guard 实现要点 + +### 7.1 Guard(修正签名:`execute` 返回 `Response | null`,越界 throw) + +```ts +// model-rate-limit-guard.ts +import type { GuardStep } from "./guard-pipeline"; +import type { ProxySession } from "./session"; + +export const ModelRateLimitGuard: GuardStep = { + name: "modelRateLimit", + async execute(session: ProxySession): Promise { + if (!isModelRateLimitEnabled()) return null; + const user = session.authState?.user; + const key = session.authState?.key; + const model = session.getCurrentModel(); // 修正:非 getModel;按实际服务模型计量 + if (!user || !model) return null; + + const limit = await resolveModelLimit({ userId: user.id, keyId: key?.id, model }); + if (!limit) return null; // 4 级查找均未命中 → 不限制(向后兼容) + + // 越界时内部 throw RateLimitError(前缀 MODEL_*),由 pipeline 冒泡至 proxy-handler + await ModelRateLimitService.checkCostLimitsWithLease(limit, model); + return null; + }, +}; +``` + +**注意**: +- 与主线一致用 **lease** 模式(`checkCostLimitsWithLease`),避免"先 check 后 record"的并发越界。 +- 违规通过 `throw RateLimitError` 传播——这与主线 `rateLimit` 步骤一致(pipeline `run()` 不 catch,交 proxy-handler 统一处理)。`execute` 正常路径返回 `null`。 + +### 7.2 成本回填挂钩(修正:这是真实主线 diff,原 §14 漏算) + +回填点在 `response-handler.ts:3970` 现有 `decrementLeaseBudget` 的 `Promise.all` 数组。在其中**追加 model 维度扣减**(fire-and-forget,与现有最终一致性级别相同): + +```ts +// response-handler.ts ~3970,在现有 Promise.all([...]) 数组内追加(flag 守卫) +...(isModelRateLimitEnabled() && model + ? [ + ModelRateLimitService.decrementLease(user.id, "user", model, costFloat), + ...(key ? [ModelRateLimitService.decrementLease(key.id, "key", model, costFloat)] : []), + ] + : []), +``` + +- `model` 取该处已可得的实际服务模型(与 `usage_ledger.model` 落库口径一致)。 +- `ENABLE_MODEL_RATE_LIMIT=false` 时数组为空 → **零行为变化**。 +- DB 权威源始终是 `usage_ledger`(已含 model 列),即便 Redis 扣减失败,下次检查会从 DB 聚合重建切片,最终一致。 + +--- + +## 8. 配置与开关 + +`.env.example` 追加(不改老变量): + +```bash +# Per-model rate limit +ENABLE_MODEL_RATE_LIMIT=false # 默认关闭;依赖 ENABLE_RATE_LIMIT=true +MODEL_RATE_LIMIT_FAIL_OPEN=true # Redis 故障时 fail-open(与主线一致) +``` + +读取位置:`src/lib/model-rate-limit/types.ts` 的 `isModelRateLimitEnabled()`;guard 入口与回填挂钩均先判 flag。 + +**启动注册(修正:用 `register()` hook)**:`instrumentation.ts:243` 是 `export async function register()`,已有 `globalThis.__CCH_*` 幂等守卫模式。在其内部触发,比顶层副作用 import 更符合现有约定: + +```ts +// instrumentation.ts register() 内 +const { registerModelRateLimitExtension } = await import("@/lib/model-rate-limit/register"); +registerModelRateLimitExtension(); // 内部调 registerExtensionStep({ key, step, insertAfter: "rateLimit" }) +``` + +--- + +## 9. Admin REST API + +新路由(不动现有路由文件): + +``` +GET /api/v1/resources/model-limits/users/:userId +POST /api/v1/resources/model-limits/users/:userId # upsert (model in body) +DELETE /api/v1/resources/model-limits/users/:userId/:model + +GET /api/v1/resources/model-limits/keys/:keyId +POST /api/v1/resources/model-limits/keys/:keyId +DELETE /api/v1/resources/model-limits/keys/:keyId/:model +``` + +- 复用现有 admin auth 中间件;遵循 `src/app/api/v1/resources//{router.ts, handlers.ts}` 结构(参考 `resources/keys/`) +- 用 zod-openapi 描述请求/响应 schema,跑 `bun run openapi:generate` + `bun run openapi:lint` 通过 +- **挂载点(精确)**:`src/app/api/v1/_root/app.ts`,按现有 `app.route("/", keysRouter)`(`app.ts:155`)模式追加一行 `app.route("/", modelLimitsRouter)` + +--- + +## 10. Dashboard UI + +新路由:`/dashboard/quotas/model-limits`,从 quotas 主导航增加入口。 + +- **列表视图**:表格列 = `Scope | Subject | Model | 5h | Daily | Weekly | Monthly | Total | Reset Mode | Updated` +- **筛选**:按用户/Key、按模型名搜索 +- **编辑**:弹窗表单,与现有 user-quota 编辑形态一致;调用新 Server Action +- **批量导入**:第二阶段再做,先留按钮 disabled + +i18n 键示例(`messages//quota.json` 追加): + +```json +"modelLimits": { + "title": "按模型限额", + "scope": { "user": "用户", "key": "Key" }, + "table": { "model": "模型", "fiveHour": "5 小时", ... }, + "dialog": { "addModel": "新增模型限额", "wildcardHint": "* 表示兜底" } +} +``` + +5 语言文件 (`zh-CN`, `zh-TW`, `en`, `ja`, `ru`) 同步;提交前跑: +- `bun run i18n:audit-placeholders:fail` +- `bun run i18n:audit-messages-no-emoji:fail` + +--- + +## 11. 测试策略 + +| 层级 | 文件 | 关键断言 | +|---|---|---| +| Unit / Service | `tests/unit/lib/model-rate-limit/service.test.ts` | 5 个周期分别越界抛 `RateLimitError`;fail-open 行为 | +| Unit / Resolver | `tests/unit/lib/model-rate-limit/resolver.test.ts` | 4 级冲突解析顺序正确,通配符 `*` 命中 | +| Unit / Guard | `tests/unit/proxy/model-rate-limit-guard.test.ts` | flag 关闭时直接通过;session 缺 user/model 时不报错 | +| Unit / Repository | `tests/unit/repository/model-limit.test.ts` | upsert 唯一索引、cascade 删除 | +| Integration | `tests/integration/model-rate-limit.test.ts` | 真实 Redis + PG 跑完一轮 lease/record/越界 | +| Security | 复用现有 auth rate-limit 套路,校验越权访问 admin API | + +目标覆盖 ≥ 80%(与 CLAUDE.md Critical Rule 2 对齐),跑 `bun run test:coverage` 验证。 + +--- + +## 12. 分阶段实施清单 + +### Phase 1 — Schema & Service(无 UI 价值,先打地基) +- [ ] schema.ts 追加 `userModelLimits` / `keyModelLimits` +- [ ] `bun run db:generate` → review → `bun run db:migrate` → `bun run validate:migrations` +- [ ] repository 聚合查询 `sumUserCostByModelInTimeRange` / `sumKeyCostByModelInTimeRange`(加 model 过滤)+ 单测 +- [ ] `src/repository/model-limit.ts`(CRUD + transformer)+ 单测 +- [ ] `src/lib/model-rate-limit/` 全套(keys/lease/resolver/service/types,复用 `rate-limit/lease.ts` 纯函数)+ 单测 +- [ ] **不接入 pipeline**,仅暴露 service API + +### Phase 2 — Pipeline 接入 + 回填挂钩 +- [ ] `guard-pipeline.ts` 增加 `registerExtensionStep()` 钩子 + `build()` splice 注入(方案 A) +- [ ] `model-rate-limit-guard.ts` 实现(`execute` 签名,越界 throw) +- [ ] `register.ts` 在 `instrumentation.ts` 的 `register()` 内 import 触发注册 +- [ ] **`response-handler.ts:3970` 追加 model 维度 `decrementLease`(flag 守卫)** +- [ ] `ENABLE_MODEL_RATE_LIMIT` 默认 false,灰度开启 +- [ ] 一行单测固化扩展钩子向后兼容(防主线重构回归) +- [ ] 集成测试通过(lease 检查 + 回填 + 越界一轮) + +### Phase 3 — Admin API +- [ ] `/api/v1/resources/model-limits/{handlers,router}.ts` +- [ ] 挂载到主 OpenAPI 注册点 +- [ ] `bun run openapi:check` + `bun run openapi:lint` 通过 +- [ ] `bun run test:v1` 覆盖新端点 + +### Phase 4 — Dashboard UI +- [ ] `/dashboard/quotas/model-limits/page.tsx` + 组件 +- [ ] 5 语言 i18n 落地,过 i18n audit +- [ ] Server Action 包装 + ActionResult 错误展示 + +### Phase 5 — 文档与上线 +- [ ] `docs/api/` 补充新端点说明 +- [ ] CHANGELOG 增加条目 +- [ ] PR 目标分支 `dev`,按 CONTRIBUTING.md 走 squash-merge + +--- + +## 13. 风险与缓解 + +| 风险 | 缓解 | +|---|---| +| Redis 写放大(每请求多写一组 key) | Phase 1 加 micro-benchmark;超阈值时改异步合并写入 | +| 与主线 limit 同时违规导致错误消息歧义 | Error code 加 `MODEL_*` 前缀;UI 区分展示 | +| 通配符限额与具体限额叠加语义混乱 | 解析器**只取最具体的一条**(4 级顺序短路) | +| 主线后续重构 `guard-pipeline.ts` | 方案 A 的扩展钩子是公开 API,重构时需保持向后兼容;写一行单测固化 | +| 老用户没有 model 限额配置 | 解析器返回 null → 完全跳过,行为与现在一致,向后兼容 | +| 上游同步主线变更冲突 | 所有新增文件路径独立;diff 仅集中在 schema.ts 追加 + guard-pipeline.ts 钩子 + OpenAPI 注册一行 | + +--- + +## 14. 主线最小 diff 估算 + +| 文件 | 修改行数 | 类型 | +|---|---|---| +| `src/drizzle/schema.ts` | +50 | 追加表定义 | +| `src/app/v1/_lib/proxy/guard-pipeline.ts` | +25 | 扩展钩子 + `build()` splice(方案 A) | +| `src/app/v1/_lib/proxy/response-handler.ts` | **+8** | **model 维度 lease 扣减(flag 守卫)—— 原 §14 漏算** | +| `src/instrumentation.ts` | +3 | `register()` 内触发扩展注册 | +| `src/app/api/v1/_root/app.ts` | +2 | 挂新路由 `app.route("/", modelLimitsRouter)` | +| repository 聚合查询所在文件 | +追加函数 | `sum*CostByModelInTimeRange`(加 model 过滤,不改老查询) | +| `messages//quota.json` × 5 | +追加 sub-tree | 不改老 key | +| `.env.example` | +2 | 追加新变量 | + +**合计主线侵入约 90 行追加、0 行删除/改写**(其中 `response-handler.ts` +8 是诚实计入的回填挂钩,flag 关闭时零行为变化)。其余约 90% 工作量落在新建独立文件。 + +--- + +## 15. 验收标准 + +- [ ] `ENABLE_MODEL_RATE_LIMIT=false` 时整套行为与主线完全一致(回归测试通过) +- [ ] `ENABLE_MODEL_RATE_LIMIT=true` 且未配置任何 model 限额时,请求路径无可观察差异 +- [ ] 配置后命中 5h/daily/weekly/monthly/total 越界均返回正确错误码与 i18n 消息 +- [ ] 单测覆盖率 ≥ 80%,`bun run test:coverage` 通过 +- [ ] OpenAPI lint、i18n audit、typecheck、build 全绿 +- [ ] Dashboard 可视化创建、编辑、删除一条 user/key × model 限额 + +--- + +## 16. 决策记录(已拍板) + +1. **方案 A vs B** → **采用方案 A**(`guard-pipeline.ts` 扩展钩子 + `build()` splice,约 +25 行)。方案 B 否决(见 §3 备查)。 +2. **模型计量口径** → 按 **`getCurrentModel()`**(实际服务模型,对齐 `usage_ledger.model`)。 +3. **计数机制** → **复刻 lease 模式**(DB 权威 + Redis 切片 + 原子扣减),不用独立计数器。 +4. **RPM / 并发** → **第一版不纳入**(热路径写放大成本高),放 v2。 +5. **通配符语义** → 第一版仅支持精确匹配 + 全兜底 `*`,前缀匹配(`claude-opus-*`)放 v2。 +6. **历史 usage 回填** → **不回放**,新限额自激活时刻起计(lease 首次检查会从 `usage_ledger` 聚合当前窗口用量,行为与现有 weekly/monthly 一致)。 +7. **Provider 维度**(`provider × model`)→ 放 v2。 diff --git a/docs/limit/group-rate-limit.md b/docs/limit/group-rate-limit.md new file mode 100644 index 000000000..b3f75a945 --- /dev/null +++ b/docs/limit/group-rate-limit.md @@ -0,0 +1,834 @@ +# 用户组 × 模型组 限额 — 设计方案(修订版) + +> 目标:重构现有「按模型限额」模块,引入**模型组**与**用户组**两维度、**取最大值**的合并语义,以及**临时提额**能力;命中模型维度限额时该轴消费**完全切分**——既跳过[用户管理]页面的全局成本检查,也**不计入**该轴主线全局额(通过 `usage_ledger` 按轴打标实现,见 §5.3/§16.1)。 +> +> **改动边界(重要)**:分支上已提交的「按模型限额」Phase 1-5 **尚未上线**,可自由重构——`user_model_limits` / `key_model_limits` 两表及其 CRUD/API/UI 均可推倒重来,无需兼容。「不破坏原有逻辑」专指 **`origin/main` 主线**(代理流程、全局限额 `ProxyRateLimitGuard`、guard pipeline、`usage_ledger` 等)。`ENABLE_MODEL_RATE_LIMIT=false` 时整体行为与 main 完全一致。 +> +> **合并策略(实现前必读)**:上游同步与落地合并见 [`../merge/group-rate-limit-merge-plan.md`](../merge/group-rate-limit-merge-plan.md)(开发期持续吸收 upstream)与 [`../merge/group-rate-limit-landing-plan.md`](../merge/group-rate-limit-landing-plan.md)(特性合入与落地后姿态)。**主线 inline 改动须按 seam 化实现**(§5.2.4 的 `backfill.ts`、§8、§14),把 response-handler 等热文件的冲突面降到最小。 + +--- + +## 1. 需求与已决策项 + +### 1.1 需求 + +1. 「按模型限额」新增**模型组**与**用户组**两维度。 +2. 用户组由 `users.tags` 分类;限额主体可为**单用户**、**用户组**,并保留 **Key**。 +3. 模型组由管理员按模型名归集;限额目标为**模型组**(单模型 = 单元素模型组)。 +4. 命中模型维度限额时,遵循模型维度限额,[用户管理]页面全局限额对该请求不再生效。 +5. 用户可申请**临时提额**(提额度 + 有效期)。 + +### 1.2 已拍板决策 + +| 编号 | 决策 | 结论 | +|---|---|---| +| D1 | 重构自由度 | Phase 1-5 未上线,可推倒重来;只保证不破坏 `origin/main` 主线。 | +| D2 | 主体维度 | **Key + 用户 + 用户组**三类。 | +| D3 | Key 关系 | Key 侧为**独立预算桶,AND 生效**(同主线 Key/User 关系);用户侧按 max 合并。 | +| D4 | 合并语义 | 用户隶属多个用户组、或叠加个人配置时,同一周期档**取最大值(最宽松)**。**不再使用优先级裁决**。 | +| D5 | 用户组限额口径 | **按成员的人均上限**(非全组共享预算)——对该用户**自身**在该模型组上的消费计量。这使「取最大值」在同一口径下成立。 | +| D6 | 模型组分区 | **全局互斥**:一个模型在全系统只属一个模型组(DB 唯一约束);单模型限额 = 单元素组。 | +| D7 | 目标解析 | 由于 D6,目标维度坍缩为「查该模型唯一所属的组」,**无目标优先级**。 | +| D8 | 旁路语义 | **按轴旁路**:用户侧命中则旁路主线用户级全局成本限额;Key 侧命中则旁路主线 Key 级全局成本限额(见 §5)。RPM/并发**不旁路**。 | +| D9 | 回退 | 模型不属任何组,或某轴无任何配置 → 该轴遵循[用户管理]页配置(主线)。 | +| D10 | 提额 | **additive,作用于指定单个周期**。建模为**独立授予账本** `quota_boost_grants`(非限额行内联列);同一 (用户,组,窗口) **可多条不同有效期并自然叠加**;有效期内该档 `上限 += Σ提额度`(见 §7)。 | +| D11 | 提额主体 | **仅个人用户**(账本 `userId` 强约束);用户组与 Key **不可**提额。 | +| D12 | 提额过期 | 当前时间超过有效期 `end` 即由定时任务**DELETE 过期授予行**,避免每请求判定(见 §7.1)。撤销提额 = 删行。 | +| D14 | 提额申请流 | **当前系统不实现**用户自助申请/审批工作流;管理员在 Dashboard 直接增删授予行(无 pending/审批状态机)。 | +| D13 | 旁路计入口径(**v1 已采纳完全切分**) | **配置轴完全切分**:命中某轴模型限额时,该请求消费**既跳过该轴主线全局检查、也不计入该轴主线全局额**。实现为 `usage_ledger` **按轴打标**两列 `counted_in_user_global` / `counted_in_key_global`(写入期由 bypass 标记冻结,默认 true),全局聚合按标记过滤(见 §5.3/§16.1)。已否决初版「仅跳过检查、消费仍计入」与读取期 `NOT IN` 排除(理由见 §16.1)。 | + +--- + +## 2. 计量与口径基线(沿用主线,不破坏) + +- DB 权威源:`usage_ledger`(已含 `model` 列)。 +- 计数:复刻主线 lease(DB 权威 + Redis 切片 + 原子 Lua 扣减),fail-open 由 `MODEL_RATE_LIMIT_FAIL_OPEN` 控制。 +- 模型口径:`session.getCurrentModel()`(重定向后实际服务模型),对齐 `usage_ledger.model`。 +- 周期档:5h / daily / weekly / monthly / total,及 fixed/rolling 模式(沿用 `dailyResetModeEnum`)。RPM/并发 v1 不纳入模型维度。 + +--- + +## 3. 数据模型(全新设计;旧两表废弃重建) + +### 3.1 `model_groups` — 模型组 + +```ts +export const modelGroups = pgTable("model_groups", { + id: serial("id").primaryKey(), + name: varchar("name", { length: 128 }).notNull(), + description: text("description"), + isSingleton: boolean("is_singleton").notNull().default(false), // 单模型快捷组标记 + createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(), + updatedAt: timestamp("updated_at", { withTimezone: true }).defaultNow().notNull(), +}, (t) => ({ nameUnique: uniqueIndex("model_groups_name_idx").on(t.name) })); +``` + +### 3.2 `model_group_members` — 模型→组的全局互斥映射(D6) + +```ts +export const modelGroupMembers = pgTable("model_group_members", { + id: serial("id").primaryKey(), + modelGroupId: integer("model_group_id").notNull().references(() => modelGroups.id, { onDelete: "cascade" }), + model: varchar("model", { length: 128 }).notNull(), + createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(), +}, (t) => ({ + modelUnique: uniqueIndex("model_group_members_model_idx").on(t.model), // 一个模型只属一个组(DB 强约束) + byGroup: index("model_group_members_group_idx").on(t.modelGroupId), +})); +``` + +- 请求期反查 `model → groupId` 为单条索引查询(`WHERE model = ?`)。 +- 「单模型限额」= 建一个 `isSingleton=true`、仅含该模型的组(UI 提供快捷入口)。 +- 添加已属其他组的模型 → 唯一约束/应用层校验报错。 + +### 3.3 `user_groups` — 用户组(tag 登记,无 priority) + +```ts +export const userGroups = pgTable("user_groups", { + id: serial("id").primaryKey(), + tag: varchar("tag", { length: 255 }).notNull(), // 映射 users.tags 中某 tag + name: varchar("name", { length: 128 }), + description: text("description"), + createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(), + updatedAt: timestamp("updated_at", { withTimezone: true }).defaultNow().notNull(), +}, (t) => ({ tagUnique: uniqueIndex("user_groups_tag_idx").on(t.tag) })); +``` + +- 成员资格由 `users.tags @> [tag]` 派生(复用 GIN 索引 `idx_users_tags_gin`),**无独立成员表**。 + +### 3.4 `model_group_limits` — 统一限额表(仅基准五档) + +```ts +export const limitSubjectEnum = pgEnum("limit_subject", ["user", "key", "user_group"]); +export const boostWindowEnum = pgEnum("boost_window", ["5h", "daily", "weekly", "monthly", "total"]); + +export const modelGroupLimits = pgTable("model_group_limits", { + id: serial("id").primaryKey(), + subjectType: limitSubjectEnum("subject_type").notNull(), + subjectId: integer("subject_id").notNull(), // userId | keyId | userGroupId + modelGroupId: integer("model_group_id").notNull().references(() => modelGroups.id, { onDelete: "cascade" }), + // —— 五档基准限额 —— + rpmLimit: integer("rpm_limit"), // 预留,v1 不强制 + limit5hUsd: numeric("limit_5h_usd", { precision: 10, scale: 2 }), + limit5hResetMode: dailyResetModeEnum("limit_5h_reset_mode").default("fixed").notNull(), + dailyLimitUsd: numeric("daily_limit_usd", { precision: 10, scale: 2 }), + limitWeeklyUsd: numeric("limit_weekly_usd", { precision: 10, scale: 2 }), + limitMonthlyUsd: numeric("limit_monthly_usd", { precision: 10, scale: 2 }), + limitTotalUsd: numeric("limit_total_usd", { precision: 10, scale: 2 }), + limit5hCostResetAt: timestamp("limit_5h_cost_reset_at", { withTimezone: true }), + createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(), + updatedAt: timestamp("updated_at", { withTimezone: true }).defaultNow().notNull(), +}, (t) => ({ + uniq: uniqueIndex("model_group_limits_uniq_idx").on(t.subjectType, t.subjectId, t.modelGroupId), + bySubject: index("model_group_limits_subject_idx").on(t.subjectType, t.subjectId), + byGroup: index("model_group_limits_group_idx").on(t.modelGroupId), +})); +``` + +- 限额行只存基准五档;临时提额拆到独立账本 `quota_boost_grants`(§3.5,D10),不再内联在此表。 +- 一条 (subject, group) 唯一。 + +### 3.5 `quota_boost_grants` — 临时提额授予账本(D10/D11/D12/D14) + +```ts +export const quotaBoostGrants = pgTable("quota_boost_grants", { + id: serial("id").primaryKey(), + userId: integer("user_id").notNull().references(() => users.id, { onDelete: "cascade" }), // D11 仅个人用户 + modelGroupId: integer("model_group_id").notNull().references(() => modelGroups.id, { onDelete: "cascade" }), + window: boostWindowEnum("window").notNull(), // 作用单档(D10) + amountUsd: numeric("amount_usd", { precision: 10, scale: 2 }).notNull(), + validPeriod: tstzrange("valid_period").notNull(), // 有效期 [from, end) + note: text("note"), // 管理员备注(可选) + createdBy: integer("created_by"), // 操作管理员(可选审计) + createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(), + updatedAt: timestamp("updated_at", { withTimezone: true }).defaultNow().notNull(), +}, (t) => ({ + byTarget: index("quota_boost_grants_target_idx").on(t.userId, t.modelGroupId, t.window), + periodGist: index("quota_boost_grants_period_gist").using("gist", t.validPeriod), +})); +``` + +- **无 `status` 字段、无状态机(D14)**:管理员写入即生效(到 `validPeriod` 起点起算),撤销 = 删行。 +- 同一 (userId, modelGroupId, window) 允许**多条不同有效期并存**,解析时叠加(§4.4)。 +- `tstzrange` 需 `customType` 或 drizzle range 支持;不支持则退化为 `validFrom` / `validTo` 两列(过期索引改建在 `validTo`)。 + +### 3.6 `usage_ledger` 按轴打标列(完全切分基石,D13) + +完全切分要求全局额聚合能区分「计入全局 / 模型组单算」的消费。为避免读取期 `NOT IN` 的追溯性与漂移(§16.1),在主线 `usage_ledger` 增两列,**写入期冻结归属**: + +```ts +// usage_ledger 追加(主线表变更) +countedInUserGlobal: boolean("counted_in_user_global").notNull().default(true), +countedInKeyGlobal: boolean("counted_in_key_global").notNull().default(true), +``` + +- **取值(落账时写入)**:`counted_in_user_global = !session.bypassUserGlobalCost`,`counted_in_key_global = !session.bypassKeyGlobalCost`(在 `updateRequestCostFromUsage` 落账处与 `costUsd` 一并写入;bypass 标记在 guard 阶段已就绪,见 §5.2)。 +- **默认 `true`** → 历史行、flag 关闭、模型无组、fail-open(不置 bypass,§5.2)全部计入全局,保证「flag off 与 main 逐字节一致」与历史行无须回填。 +- **单一事实源**:同一对标记同时驱动「DB 全局聚合过滤」「Redis 全局桶扣减/计数跳过」「展示分栏」三处(§5.3/§6/§10),口径恒一致,无漂移、无 `NOT IN`、无追溯重分类。 +- 索引:全局聚合在现有 `(userId, createdAt)` / `(key, createdAt)` 上追加 `counted_in_*_global = true` 残余过滤;绝大多数行为 true,无须独立索引。 + +### 3.7 迁移 + +旧 `user_model_limits` / `key_model_limits` 未上线,直接在 `schema.ts` 删除并以上述表替代。**`usage_ledger` 两列为主线表变更**,与新五表一并 `bun run db:generate` → review → `db:migrate` → `validate:migrations`。 + +> **合并友好(实现约定)**:本特性对 `schema.ts` 的改动(新五表 + `usage_ledger` 2 列 + `system_config` 5 列)**集中放在文件末尾、用 region 注释包裹**,降低与 upstream schema 编辑的文本冲突;生成的迁移应为**最高编号**。每次同步 upstream 后**不手工 merge 迁移生成物(journal/snapshot),而是丢弃后 `db:generate` 重生成**(见合并计划 §5)。 + +--- + +## 4. 解析算法 + +请求上下文:`keyId`、`userId`、`model`、`tags = user.tags`、`now`。 + +### 4.1 目标解析(D7) + +``` +G = SELECT model_group_id FROM model_group_members WHERE model = :model +若 G 为空 → 该模型不属任何组 → enforced=[],两轴均回退主线(D9)。 +``` + +> **热路径缓存(强约束,v1 必做)**:`model→groupId` 反查**不得**每请求打 DB。`model_groups` / `model_group_members` / `user_groups` 均为管理员低频变更、每请求高频读,必须走**进程内短 TTL 缓存快照**——采用 `provider-cache.ts` 同款 **L1 + Redis pub/sub 失效** 模式(见 `src/lib/cache/provider-cache.ts`、`publishCacheInvalidation` / `subscribeCacheInvalidation`),写操作(增删组/成员/tag 登记)本地失效**并广播**,保证多 Pod 集群近即时一致。**不要**用 `system-settings-cache.ts` 的纯进程内单点失效——那只清本进程、是跨 Pod 陈旧的根因。叠加 stale-while-revalidate(见 §17.3 OPT-C)。否则 flag 开启后,**即使全系统零模型组**,每请求仍会平白多一次同步 DB 往返横在转发前,并在高并发下放大 PG QPS、吃满连接池——这是企业级网关真正会爆的点。详见 §4.7。 + +### 4.2 Key 侧(独立桶,AND) + +``` +keyRow = model_group_limits WHERE subject=(key, keyId) AND modelGroupId=G +keySide = keyRow ? { bucket:(key,keyId,G), caps: effCaps(keyRow, now) } : null +``` + +### 4.3 用户侧(max 合并,人均口径 D5) + +``` +sources = [] +indivRow = limits WHERE subject=(user, userId) AND group=G; if indivRow: sources += indivRow +for ug in userGroups WHERE tag IN user.tags: + ugRow = limits WHERE subject=(user_group, ug.id) AND group=G; if ugRow: sources += ugRow +若 sources 为空 → userSide=null(用户侧回退主线,D9) +否则 userSide = { + bucket: (user, userId, G), // 始终按该用户自身消费计量(人均口径) + caps[w] = MAX over sources of effCap(src, w, now) // 逐档取最大 +} +``` + +### 4.4 提额生效(D10/D11/D12;F1 虚拟个人 source;F2 缓存读取) + +提额来自 `quota_boost_grants`(仅个人用户,D11),通过抬高「个人 source」的上限生效。逐档 `w` 计算: + +``` +boostSum(userId, G, w, now) = + Σ amountUsd over 满足 user_id=userId AND model_group_id=G AND window=w + AND valid_period @> now 的授予 // 多条自然叠加 + +groupMax[w] = MAX over 用户组 sources of cap[w] // 无用户组 source → 不存在 + +# 个人 source 基线(F1:无个人行时合成虚拟个人 source) +personalBase[w] = + 存在个人限额行 → 个人行 cap[w] + 否则存在任一用户组 source → groupMax[w] // 虚拟个人 source + 否则(无任何 source) → 缺省(提额惰性,见下) + +personalEff[w] = personalBase 缺省 ? 缺省 + : (personalBase[w]==null ? null : personalBase[w] + boostSum) + +# 回到 §4.3: +caps[w] = MAX(groupMax[w], personalEff[w]) // 仅对存在项取 max +``` + +- **F1 合成虚拟个人 source(已采纳方案 B)**:用户**无个人限额行、仅命中用户组限额**时,提额仍以「该用户在该组的用户组上限 `groupMax`」为基线叠加生效,避免「授予了却不生效」。`boostSum=0` 时虚拟 source = `groupMax`,与无提额行为完全一致(无回归)。 +- **提额惰性边界**:当用户在该组**无任何 source**(个人 + 用户组皆无)时,提额**不**凭空创建模型组限额——否则会把本应「回退主线全局额」(D9)错误收紧成一个 = 提额度的模型桶(方向相反)。此时 `userSide=null`,照常回退主线。 +- **F2 缓存读取(已采纳短 TTL)**:活跃授予随限额行一并进入 §4.7 解析快照(短 TTL + 写操作失效);请求期对快照内授予做 **in-memory `valid_period @> now`** 判定。因此: + - **时间窗激活/失效是 in-memory 精确判定、零延迟**(含「预排未来生效」的授予,到点即生效); + - **仅管理员增删授予行**有 ≤TTL 传播延迟(写时失效则即时); + - §7.1 的过期 DELETE 仅做存储清理,残留过期行也因 `@> now` 兜底不会误生效。 +- 提额抬高的是个人 source 上限,再参与 §4.3 的 max;计量对象仍是该 user 自身(人均口径 D5 不变)。 +- `null`(无限)在 max 中视为 +∞ 取胜;某档全为 null → 该档无限。 + +### 4.5 最终 + +``` +enforced = [keySide, userSide].filter(非 null) // 两桶 AND 全部通过 +``` + +### 4.6 示例 + +| 配置 | 请求 | 结果 | +|---|---|---| +| 模型 opus 属组 `g-opus`;user 5 个人 (user,g-opus) 日额 \$10;tag team-a (user_group,g-opus) 日额 \$30 | opus | userSide 日额 = max(10,30)=\$30,按 user 5 自身消费计量;旁路用户级全局额 | +| 同上,给 **user 5 个人** 一条提额授予:window=daily, +\$50, 有效期内(D11 仅个人用户)| opus(期内) | 个人 source 日额 = 10+50=\$60;userSide = max(60, 30)=\$60 | +| 同上,再给 user 5 追加第二条提额:window=daily, +\$20, 有效期重叠 | opus(两条均有效)| 个人 source 日额 = 10+50+20=\$80(多条叠加);userSide = max(80,30)=\$80 | +| key 99 设 (key,g-opus) 日额 \$5;user 无配置 | opus | keySide=\$5(旁路 Key 级全局额);userSide=null(用户级走主线全局额) | +| 模型 sonnet 不属任何组 | sonnet | enforced=[],完全走主线全局额 | + +### 4.7 热路径与缓存(性能基线) + +模型 guard 插在 `rateLimit` **之前**,其全部解析与检查都横在 `forwardStartTime` 之前,直接计入转发前延迟。设计两条硬约束: + +1. **解析走缓存快照,零热路径 DB 往返**:解析所需的全部数据均从进程内短 TTL 快照读取(§4.1 注 + F2),快照组成(F4): + - `model→groupId` 反查表、各组成员列表(`model_group_members`); + - `tag→user_group` 登记、各 `(subjectType, subjectId, modelGroupId)` 限额行(`model_group_limits`); + - 活跃/未来提额授予行(`quota_boost_grants`,按 `(userId, modelGroupId, window)` 索引)——请求期在内存做 `valid_period @> now` 判定(§4.4 F2)。 + 写操作(增删组/成员/tag/限额/提额)触发对应失效;目标:flag 开启且**任意配置组合**下,解析阶段**新增 0 次 DB 往返**(含提额,不再有 GiST 往返)。 +2. **lease 检查并行,不串行**:同一桶 5 档、以及 keySide / userSide 两桶之间用 `Promise.all` 并发(注意:现有 `src/lib/model-rate-limit/service.ts` 是串行 `for...await`,重写时改并行)。「允许通过」路径上所有档位反正都要查一遍,无 early-exit 红利;违规时在已返回结果里按固定优先级裁决错误码即可,不影响语义。 + +> **每请求新增 I/O 预估**(达成上述约束后): +> - flag off:0(与 main 逐字节一致)。 +> - flag on、模型无组:0 DB(快照命中)+ 0 Redis。 +> - flag on、单轴命中:0 DB + ≤5 Redis(并行);同时主线对应轴成本档被旁路(§5),净 Redis 大致持平。 +> - flag on、双轴命中:0 DB + ≤10 Redis(并行);主线 User-_/Key-_ 成本档均旁路。 + +### 4.8 端到端场景(审查辅助) + +统一背景:用户管理页给 **User U5 全局日额 = \$10**;`opus ∈ g-opus`,`sonnet` 不属任何组。详尽走查见评审报告 `group-rate-limit-review.html` §5。 + +| 案例 | 配置 | 序列 / 请求 | 结果与启示 | +|---|---|---|---| +| **A 完全切分对比** | `(user,g-opus)` 日 \$30 | opus×8@\$3 后发 sonnet \$5 | opus \$24 计模型桶但 `counted_user=false`,**不污染全局**;全局已用仅 \$5 → sonnet 放行。旧「仍计入」会让全局 = \$24 → 误杀 sonnet。 | +| **B 非对称轴** | 仅 `(user,g-opus)` \$30;Key K 全局日 \$8;无 `(key,g-opus)` | opus×3@\$3 | user 轴切分(`counted_user=false`),key 轴回退主线(`counted_key=true`)→ 第 3 条被 **Key 全局 \$8** 拦截(user-mg \$9 本可放行)。 | +| **C 人均口径** | `team-a (user_group,g-opus)` 日 \$30;U5 另有个人 \$10;U7 无个人行 | U5/U7 各发 opus | U5 = max(10,30)=\$30、U7=\$30,**各自独立桶**(10 人 = 10×\$30,非共享)。 | +| **D 提额 F1** | 接 C,给 U7(仅组限额)授予 daily +\$50 | opus(期内) | `personalBase=groupMax \$30`(虚拟 source)→ `personalEff=80` → cap = max(30,80)=**\$80**;不做 F1 则提额被丢弃。 | +| **E 提额惰性** | U9 在 g-opus 无任何配置,误授予 +\$50 | opus | 无 source → 提额惰性、不凭空建限额 → `userSide=null` 回退主线(no-op)。 | +| **F fail-open** | `(user,g-opus)` \$30;Redis 故障 | opus | 模型档 fail-open → **不置 bypass** → `counted_user=true`、主线全局档照常执行(不双重放行)。 | + +--- + +## 5. 覆盖语义与 Guard Pipeline(D8,按轴切分) + +### 5.1 规则(完全切分,D13) + +- **用户侧命中**(userSide≠null)→ ① 跳过主线 **User 级**成本检查、改由 userSide 桶裁决;② 该请求消费**不计入** User 主线全局额(`counted_in_user_global=false`)。 +- **Key 侧命中**(keySide≠null)→ ① 跳过主线 **Key 级**成本检查、改由 keySide 桶裁决;② 该请求消费**不计入** Key 主线全局额(`counted_in_key_global=false`)。 +- 某轴未命中 → 该轴照常走主线全局成本限额且**正常计入**(D9 回退,标记保持默认 true)。 +- **RPM 与并发 Session 始终生效**(资源护栏,不切分)。 + +> 完全切分(而非「仅跳过检查」)的理由:若命中轴的消费仍计入全局桶,则模型限额的额度只有在「全局额 ≥ 模型消费」时才用得满,且分组消费会污染全局桶、拖累该用户/Key 的**未分组**流量——违背「配置了模型限额则全局不生效」的预期。切分后命中轴与全局额成为**独立预算**:配置轴只受模型桶治理,未配置轴照常受全局额护栏(D9)。 +> +> 按轴切分的理由:Key 与 User 在主线本就是两个独立 AND 预算桶。模型维度沿用此结构 + 双标记列,能让 D9「用户侧无配置 → 遵循[用户管理]」与 D3「Key 独立 AND」同时成立,且降级平滑。 + +### 5.2 接入实现 + +1. 扩展钩子 `registerExtensionStep` 增加可选 `insertBefore`(与现有 `insertAfter` 并存);`modelRateLimit` 改 splice 在 `rateLimit` **之前**。 +2. `model-rate-limit-guard` 解析 §4 得 `enforced`;逐桶 `checkCostLimitsWithLease`(越界 throw `RateLimitError`,`MODEL_*` 码);**仅在该桶检查真实执行(非 fail-open)且通过时**才置对应 bypass 标记: + - `session.bypassUserGlobalCost = userSide 检查已执行且通过` + - `session.bypassKeyGlobalCost = keySide 检查已执行且通过` + - `session.setResolvedModelLimits(enforced)`(供回填扣减同组桶) + + > **CRITICAL — fail-open 不得置 bypass(防双重放行)**:模型 guard 在 Redis 故障时按 `MODEL_RATE_LIMIT_FAIL_OPEN` 放行(`service.ts` 已 fail-open)。若此时仍置 bypass 标记,结果是「模型档没拦(fail-open)+ 主线成本档被旁路(bypass)= 该请求成本闸门全开」。因此 fail-open(`result.failOpen === true`)的桶**必须保留对应主线全局额护栏**,即不置该轴 bypass,让主线 `ProxyRateLimitGuard` 继续兜底。该不变量写进回归测试(§11、§15)。 +3. 主线 `ProxyRateLimitGuard.ensure()`(`rate-limit-guard.ts`)做受标记守卫的最小改动: + - 每个 **User-*** 成本档检查包进 `if (!session.bypassUserGlobalCost)`; + - 每个 **Key-*** 成本档检查包进 `if (!session.bypassKeyGlobalCost)`; + - RPM / 并发块**不加守卫**。 + - flag 关闭时两标记恒 false → 零行为变化。 +4. **落账打标 + 回填跳过**(完全切分关键;**seam 化实现**,见合并计划 §3.1):可外移逻辑放入新文件 `src/lib/model-rate-limit/backfill.ts`,response-handler(最高频文件)只留极小调用点: + - `backfill.ts` 暴露:`resolveCountedFlags(session)` → `{countedUser, countedKey} = {!bypassUserGlobalCost, !bypassKeyGlobalCost}`;`modelBucketDecrements(session, costFloat)` → 按 `resolvedModelLimits` 生成模型桶 decrement(全新增、可完全外移)。 + - `response-handler` 仅改两处:① `updateRequestCostFromUsage` 落账时用 `resolveCountedFlags` 写两列;② `trackCostToRedis` 的 `decrementLeaseBudget` 数组里,**被旁路轴的 User-_/Key-_ 全局扣减用条件 spread 跳过**(`...(session.bypassUserGlobalCost ? [] : [/* user 4 档 */])`,与既有 `...(flag ? [...] : [])` 写法同构),再 `...modelBucketDecrements(...)` 追加模型桶;provider 桶照常。 + - **CRITICAL — 5h-fixed 计数器也须按轴跳过**:lease 播种里**只有 5h-fixed 窗口**从 Redis 计数器 `{type}:{id}:cost_5h_fixed` 取数(`lease-service.readFixed5hWindowState`),其余档(5h-rolling、daily/weekly/monthly、total)一律从已按 `counted_in_*_global` 过滤的 DB 聚合播种。该计数器由 `trackCost` 写入,且 `trackCost` 在 decrement 数组**之外无条件调用**——因此仅跳过 `decrementLeaseBudget` 数组不足以切分 5h-fixed:被旁路轴的消费仍会 INCRBYFLOAT 进 `cost_5h_fixed`,主线全局 5h-fixed lease 每次刷新都从被污染的计数器重新播种,从而消耗主线全局 5h 额度(违背 §4.8-A)。**实现修正**:`trackCost` 新增 `bypassKeyGlobalCost?` / `bypassUserGlobalCost?` 选项,被旁路轴**跳过其 key/user 5h-fixed 写入**(provider 与其余档不动)。daily/weekly/monthly/total/5h-rolling 因从过滤后 DB 播种,无需跳过 `trackCost`。回归见 §11「完全切分打标」与 `service-extra.test.ts`。 + - **说明**:全局轴「跳过」是 gate 既有主线 decrement 调用,**无法完全外移**,只能在数组处做局部条件 spread(小而幂等);`resolveCountedFlags` 与模型桶 decrement 则完全落在 `backfill.ts`。这与 §3.6 标记、§6 聚合过滤三处同源,保证 Redis 切片与 DB 重播种口径一致、无漂移。 + - flag 关闭时两标记恒 false / 条件 spread 恒取全集 → 全部正常计入,零行为变化。 + +### 5.3 完全切分:检查跳过 + 计量切分(按轴打标,D13) + +- **配置轴 = 检查跳过 + 计量切分**:命中轴既跳过主线成本档 `if` 判定(§5.2.3),又**不计入**该轴主线全局额。 +- `usage_ledger` 仍**无条件记录每条请求的实际成本**(口径不变),但新增两列 `counted_in_*_global` 标注该笔成本是否计入对应轴全局额(§3.6);模型桶 `decrementLease` 照常无条件执行。 +- 主线**全局额聚合**(lease 播种 + total 计数 + 回填扣减)一律按 `counted_in_*_global = true` 过滤(§6),因此被切分轴的消费**不再**进入该轴主线全局桶——配置轴与全局额成为独立预算。 +- **非对称仍正确**:只配 userSide、未配 keySide 时,`counted_in_user_global=false`(不计 User 全局)但 `counted_in_key_global=true`(仍计 Key 全局,Key 轴照常受全局护栏)。 +- **展示口径(UI 必须明示且分栏,§10)**:因消费按轴切分,用户「计入全局额」的数字会小于其「总消费」。额度卡/my-usage 须拆「计入全局 / 模型组单算」两栏并明示,避免「总花费 ≠ 全局额度判定」的困惑。 + +--- + +## 6. lease 桶与 DB 聚合 + +| 桶 | lease key | DB 权威聚合(`usage_ledger`)| +|---|---|---| +| Key 侧 | `lease:key-mg:{keyId}:{groupId}:{window}[:{mode}]` | `keyId` 且 `model IN (组成员)` | +| 用户侧 | `lease:user-mg:{userId}:{groupId}:{window}[:{mode}]` | `userId` 且 `model IN (组成员)` | + +- **无跨成员聚合**:用户组限额是人均上限(D5),用户侧桶恒按该 user 自身计量;用户组只贡献「cap 值」,不改变计量对象。 +- `total` 档无 lease 窗口,但**复刻主线 `checkTotalCostLimit` 的 Redis 读穿缓存**(`total_cost:model:*`,5min TTL)而非每请求直连 DB(**修正**:主线 total 是 Redis 缓存 + DB 兜底 + 异步写回,非纯 DB 聚合)。详见 §17.1(OPT-A)。 +- 模型桶聚合:`sumScopeCostByModelsInTimeRange(scope, id, models[], start, end)`(`model IN (...)`,沿用现有 `sumUserCostByModelInTimeRange` 写法)。组成员列表来自 `model_group_members`。 + +### 6.1 主线全局额聚合按标记过滤(完全切分,D13) + +主线全局桶的 DB 聚合改为**只统计计入全局的消费**: + +- `sumUserCostInTimeRange` / `sumUserTotalCost`(User 轴)追加 `AND counted_in_user_global = true`;`sumKeyCostInTimeRange` / `sumKeyTotalCost`(Key 轴)追加 `AND counted_in_key_global = true`。 +- **不可原地改这些共享函数**(同时服务展示/告警)。两种落地任选其一,建议前者: + - **加可选参数** `countedInGlobalOnly?: boolean`(默认 false → 与 main 逐字节一致),仅**限额检查**调用方(`lease-service.queryDbUsage`、`checkTotalCostLimit` 的 DB 兜底)传 `true`; + - 或新增 `sum*CostCountedInGlobalInTimeRange` 变体函数。 +- **展示侧**(§10)用同源标记拆栏:`counted_*=true` → 「计入全局」,`counted_*=false` → 「模型组单算」,两者之和 = 总消费。 +- 一致性保证:DB 聚合过滤、Redis 回填跳过(§5.2.4)、展示分栏,三处全部由 `counted_in_*_global` 同一标记驱动 → lease 重播种与运行时扣减口径恒一致。 + +--- + +## 7. 提额(授予账本,无申请流) + +建模为独立账本 `quota_boost_grants`(§3.5),而非限额行内联列。 + +- **字段**:`userId`(D11 仅个人用户)、`modelGroupId`、`window`(作用单档)、`amountUsd`(提额度)、`validPeriod`(有效期 `[from,end)`)、`note`/`createdBy`(可选审计)。 +- **无申请/审批工作流(D14)**:当前系统**不实现**用户自助申请、审批状态机。管理员在 Dashboard 直接为「某用户 × 某模型组 × 某窗口」**新增/删除授予行**。生效时机(F2 缓存):增删授予**写时失效解析快照 → 近即时生效**(无写时失效则 ≤TTL 传播);而 `validPeriod` 的**时间窗到点生效/失效是 in-memory 精确判定、无延迟**(§4.4 F2)。**撤销提额 = 删行**。 +- **可叠加(D10)**:同一 (userId, modelGroupId, window) 允许多条不同有效期并存,解析时对当前有效者求和(§4.4)。 +- 生效见 §4.4:`valid_period @> now` 的授予,其 `amountUsd` 累加到对应窗口个人 source 上限。 + +### 7.1 过期清理(D12) + +需求:**当前时间超过有效期 `end` 即删除提额,避免每请求都计算时间窗**。 + +- **清理动作**:直接 **DELETE 过期授予行**(基准五档在 `model_group_limits`,不受影响)。 + ```sql + DELETE FROM quota_boost_grants WHERE upper(valid_period) <= now(); + ``` + (退化为两列时 `WHERE valid_to <= now()`。) +- **触发方式**:复用 `instrumentation.ts` 既有定时器模式(如会话缓存 `startCacheCleanup(60)`、云价 `startCloudPriceSyncScheduler` 的 `setInterval`),新增 `startBoostExpiryCleanup()`:`register()` 内带 `__CCH_*` 幂等守卫启动,周期 60s 跑一次上述 DELETE。 +- **与解析的关系**:清理让活跃授予集保持很小,使 §4.4 的取数廉价。两次清理之间残留的过期行,由 §4.4 的 `valid_period @> now` 兜底保证**不会**错误生效(清理是优化,不影响正确性)。 + +--- + +## 8. 文件清单 + +``` +src/drizzle/schema.ts + - 删除 userModelLimits / keyModelLimits + + modelGroups / modelGroupMembers / userGroups / modelGroupLimits / quotaBoostGrants + 两 enum + + usage_ledger 增 counted_in_user_global / counted_in_key_global 两列(主线表变更,§3.6) + +src/repository/ + model-group.ts # 模型组 + 成员 CRUD;model→groupId 反查;互斥校验 + user-group.ts # 用户组(tag) CRUD;tag↔成员(派生) + model-group-limit.ts # model_group_limits CRUD(按 subject) + quota-boost.ts # quota_boost_grants CRUD;活跃授予查询;过期 DELETE + usage-aggregate.ts # sumScopeCostByModelsInTimeRange(或并入现有聚合文件) + statistics.ts # 全局聚合加 countedInGlobalOnly 过滤(§6.1):sumUser/KeyCostInTimeRange、sumUser/KeyTotalCost + +src/lib/model-rate-limit/ + resolver.ts # §4:目标解析 + keySide + userSide(max) + 提额叠加 → ModelLimit[] + keys.ts # §6 lease key(key-mg / user-mg) + service.ts # checkCostLimitsWithLease(桶) / decrementLease(桶);total 桶走 Redis 读穿缓存(OPT-A §17.1);模型 lease 传 floor / 模型 percent(OPT-B §17.2) + types.ts # ModelLimit(含 bucket + caps) + flag + cache.ts # §4.1/§4.7/§17.3 解析快照(model→group / 组成员 / tag→组限额行);L1 + Redis pub/sub 失效(照 provider-cache.ts)+ stale-while-revalidate(OPT-C)+ 零组短路(OPT-F) + boost-cleanup.ts # §7.1 startBoostExpiryCleanup():60s DELETE 过期授予行 + backfill.ts # §5.2.4 seam:resolveCountedFlags() + modelBucketDecrements()(response-handler 仅留极小调用点) + +src/lib/rate-limit/ + lease-service.ts # queryDbUsage 传 countedInGlobalOnly=true(§6.1) + service.ts # checkTotalCostLimit DB 兜底传 countedInGlobalOnly=true(§6.1) + +src/instrumentation.ts # register() 内启动 startBoostExpiryCleanup()(__CCH_* 幂等守卫) + +src/app/v1/_lib/proxy/ + model-rate-limit-guard.ts # 解析 enforced、逐桶检查、置 bypassUser/KeyGlobalCost(fail-open 不置,§5.2) + rate-limit-guard.ts # User-* / Key-* 成本档按标记守卫;RPM/并发不变 + guard-pipeline.ts # ExtensionStep.insertBefore + session.ts # bypassUserGlobalCost / bypassKeyGlobalCost + resolvedModelLimits[] + response-handler.ts # §5.2.4 仅 2 处极小调用点:resolveCountedFlags 写两列 + decrement 数组条件 spread(逻辑在 backfill.ts) + +src/app/api/v1/resources/ + model-groups/{router,handlers}.ts + user-groups/{router,handlers}.ts + model-limits/{router,handlers}.ts # 重写为 model_group_limits(仅基准五档) + quota-boosts/{router,handlers}.ts # 提额授予 列表/新增/删除(无审批) + _root/app.ts # 挂新路由 + +src/actions/{model-group,user-group,model-limit,quota-boost}.ts +src/actions/{my-usage,users,keys,key-quota}.ts # 展示分栏:计入全局 / 模型组单算(§10) +src/app/[locale]/dashboard/quotas/{model-groups,user-groups,model-limits}/ # 三页面(限额页内嵌提额授予列表) +messages//quota.json # 5 语言追加/重写 modelLimits 子树(含提额、切分提示、展示分栏) +``` + +--- + +## 9. Admin REST API + +``` +# 模型组 + 成员 +GET/POST/PATCH/DELETE /api/v1/resources/model-groups[/:id] +POST/DELETE /api/v1/resources/model-groups/:id/members # { model } / ?model= + +# 用户组 +GET/POST/PATCH/DELETE /api/v1/resources/user-groups[/:id] + +# 限额(统一表,仅基准五档) +GET /api/v1/resources/model-limits?subjectType=&subjectId= +POST /api/v1/resources/model-limits # { subjectType, subjectId, modelGroupId, ...caps } +DELETE /api/v1/resources/model-limits/:id + +# 提额授予(账本,无审批;仅个人用户 D11) +GET /api/v1/resources/quota-boosts?userId=&modelGroupId= +POST /api/v1/resources/quota-boosts # { userId, modelGroupId, window, amountUsd, validPeriod, note? } +DELETE /api/v1/resources/quota-boosts/:id # 撤销 = 删行 +``` + +复用 admin auth;zod-openapi;`openapi:check && openapi:lint && test:v1` 通过。 + +--- + +## 10. Dashboard UI + +- **模型组管理**(新):建组、增删成员(模型多选,复用 `model-combobox`);「单模型」快捷建单元素组;成员互斥冲突提示。 +- **用户组管理**(新):从 `getAllUserTags()` 选 tag 登记为组。 +- **按模型限额**(重写):主体选择器(用户 / 用户组 / Key)+ 模型组选择器 + 五档限额。 +- **提额授予**(限额页内嵌,**仅主体=个人用户时可见**,D11/D14):展示该 (用户×模型组) 的活跃/未来授予列表,支持**新增**(窗口下拉 + 提额度 + 起止时间)与**删除**(=撤销);**无申请/审批入口**,管理员直接操作。 + - **F1 提示**:明示「提额会在该用户的有效上限(个人行或其用户组上限,取较高基线)之上叠加;用户即使只命中用户组限额,提额也生效」。 + - **F2 提示**:明示「`validPeriod` 到点即时生效/失效;新增或删除授予最长 N 秒(缓存 TTL)后对线上请求生效」。 +- **用量展示分栏(完全切分必需,D13/§5.3/§6.1)**:`my-usage`、用户/Key 额度卡、`cost-alert` 须按 `counted_in_*_global` 拆「**计入全局额** / **模型组单算**」两栏(两者之和 = 总消费)。否则会出现「总花费 ≠ 全局额度判定值」的困惑。全局额度卡的「已用」只取「计入全局额」部分。 +- **语义提示**:明示「命中本限额后,该轴 [用户管理] 全局成本限额**不再生效、且该消费不计入全局额**(完全切分);RPM/并发仍生效」「用户组限额为人均上限」「多源取最大值」。 +- 5 语言 i18n;过 `i18n:audit-placeholders:fail`、`i18n:audit-messages-no-emoji:fail`。 + +--- + +## 11. 测试(≥80%) + +| 层 | 关键断言 | +|---|---| +| Resolver | 目标唯一组解析;userSide 多源取 max;提额仅个人用户、按档叠加(多条求和)参与 max;keySide 独立;模型无组→enforced 空 | +| 提额 F1(虚拟 source)| 无个人行、仅用户组限额时提额仍以 groupMax 为基线叠加生效;boost=0 时虚拟 source = groupMax(无回归);无任何 source 时提额惰性(userSide 仍 null,不凭空建限额)| +| 提额账本 | 多条有效期重叠叠加求和;`valid_period @> now` 内外切换;过期 DELETE 后不再生效;删行=撤销即时生效 | +| 提额 F2(缓存)| 时间窗到点 in-memory 精确生效/失效(零延迟);增删授予写时失效后即时生效;快照内过期行因 `@> now` 不误生效 | +| Service | key-mg / user-mg 桶 lease key 与 DB 聚合口径;五档越界 `MODEL_*`;fail-open | +| Guard | userSide 命中→`bypassUserGlobalCost`;keySide 命中→`bypassKeyGlobalCost`;RPM/并发恒检查;flag 关闭直通;**fail-open 不置 bypass** | +| 完全切分打标 | 落账两列 = `!bypass*`;命中轴消费不计入该轴全局聚合、不扣该轴全局 lease;非对称(仅 userSide)→ 不计 User 全局但仍计 Key 全局;标记默认 true | +| 全局聚合过滤 | `countedInGlobalOnly` 过滤生效;展示分栏两值之和 = 总消费;改分组不追溯历史行(写入冻结) | +| 主线回归 | 两标记 false / 标记恒 true 时与 main 逐字节一致 | +| Repository | 模型成员唯一约束(互斥);各表 CRUD;tstzrange 读写;过期 DELETE | +| 集成 | 真实 Redis+PG:人均口径计量、提额期内/外切换与叠加、完全切分(命中轴消费不入全局、未配置轴照常入全局)、lease 重播种与回填跳过口径一致、RPM 仍拦截、回填多桶扣减 | + +--- + +## 12. 阶段 + +- **A** Schema + Repository(五表/两 enum、成员互斥、提额账本、聚合查询、过期 DELETE)+ 单测。 +- **B** Resolver + Service(§4/§6/§7 含提额叠加)+ 单测。 +- **C** Guard 覆盖(`insertBefore`、按轴标记、主线成本档守卫、多桶回填)+ 回归/集成。 +- **D** Admin API(四组端点 + OpenAPI)。 +- **E** Dashboard UI(三页面 + 提额授予列表)+ i18n。 +- **F** 文档(`docs/api`)+ CHANGELOG;PR → `dev`,squash-merge。 + +--- + +## 13. 风险 + +| 风险 | 缓解 | +|---|---| +| 主线成本档按轴拆分守卫引回归 | flag 默认 off、bypass 标记默认 false、`counted_in_*_global` 默认 true;写「flag 关闭逐字节一致」回归单测 | +| 模型组互斥被绕过(同模型多组) | `model_group_members.model` 唯一索引 + 应用层校验 | +| 提额 tstzrange 兼容性 | drizzle 不支持则降级 `validFrom/validTo` 两列 | +| max 合并语义被误解为「全组共享预算」 | 文档/UI 明示 D5「人均上限」;测试固化按 user 自身计量 | +| 提额过期残留 | 解析按 `valid_period @> now` 实时判定,过期自动失效;定时 DELETE 保活跃集小 | +| 与 main 同步冲突 | 主线 diff 仅 `guard-pipeline`/`rate-limit-guard`/`session`/`response-handler`/`app.ts`/`schema.ts`/`statistics.ts`/`lease-service.ts`/`rate-limit/service.ts`;其余新文件 | +| **完全切分使全局聚合变模型感知** | 用 `usage_ledger` 按轴打标(§3.6),三处(DB 过滤/Redis 跳过/展示)同源标记;默认 true 保「flag off 与 main 一致」;已否决读取期 `NOT IN`(R1/R2,§16.1)| +| **展示口径分裂(总花费 ≠ 全局额度判定)** | `my-usage`/额度卡/`cost-alert` 按标记拆「计入全局/模型组单算」两栏(§10),两值之和 = 总消费 | +| **lease 重播种与回填口径漂移** | 播种过滤、回填跳过、展示三处由同一对 `counted_in_*_global` 标记驱动,行级冻结 → 无漂移 | +| **热路径 DB 往返放大 / PG QPS 打满** | 解析走进程内短 TTL 缓存快照(§4.1/§4.7),目标无组场景 0 新增 DB 往返;lease 检查 `Promise.all` 并行;§15 加 micro-benchmark 验收 | +| **fail-open × bypass 双重放行** | fail-open 的桶不置对应 bypass 标记,保留主线全局额护栏(§5.2 CRITICAL);回归测试固化 | + +--- + +## 14. 主线最小 diff 估算 + +> 注意:本表统计**改动行数**;热路径**每请求新增 I/O** 是独立维度,见下方与 §4.7。评审以「行数 + I/O」两个维度共同拍板。 + +| 文件 | 行数 | 说明 | +|---|---|---| +| `schema.ts` | +115 / -40 | 新五表两 enum(含 `quota_boost_grants`),删旧两表;`usage_ledger` +2 标记列(§3.6)| +| `guard-pipeline.ts` | +8 | `insertBefore` | +| `rate-limit-guard.ts` | +14 | User-*/Key-* 成本档按轴守卫(RPM/并发不变)| +| `session.ts` | +10 | 两 bypass 标记 + resolvedModelLimits[] | +| `response-handler.ts` | **+8**(seam 化后) | 仅 2 处调用点:`resolveCountedFlags` 写两列 + decrement 数组条件 spread;其余逻辑在新文件 `backfill.ts`(§5.2.4,把最高频文件冲突面降到最小)| +| `statistics.ts` | +12 | 4 个全局聚合加 `countedInGlobalOnly` 过滤(§6.1,默认 false)| +| `lease-service.ts` / `rate-limit/service.ts` | +6 | `queryDbUsage` / `checkTotalCostLimit` DB 兜底传 `countedInGlobalOnly=true` | +| `instrumentation.ts` | +6 | `register()` 内启动提额过期清理定时器 | +| `_root/app.ts` | +3 | 挂三路由 | +| `messages/*/quota.json`×5 | +子树 | 不改主线其他 key | + +**每请求新增 I/O 估算**(达成 §4.7 缓存 + 并行约束后): + +| 场景 | 新增 DB 往返 | 新增 Redis 往返 | 备注 | +|---|---|---|---| +| flag off | 0 | 0 | 与 main 逐字节一致 | +| flag on、模型无组 | 0(快照命中) | 0 | 解析即返回 enforced=[] | +| flag on、单轴命中 | 0(快照命中) | ≤5(并行) | 主线对应轴成本档旁路 + 该轴全局回填跳过,净 Redis 大致持平 | +| flag on、双轴命中 | 0(快照命中) | ≤10(并行) | 主线 User-_/Key-_ 成本档均旁路 + 两轴全局回填均跳过 | + +> 若**不做**缓存(每请求实查 `model_group_members` 等),无组场景即 +1 DB、命中场景 +4~6 DB **串行**横在转发前——故 §4.7 缓存为 v1 硬约束,非优化项。完全切分的两标记列写入随落账 UPDATE 完成,无新增往返。 + +`ENABLE_MODEL_RATE_LIMIT=false`(或标记恒 true)时零行为变化。 + +> **可上游化的通用 seam(hybrid,落地计划 §4.1)**:`insertBefore`、`statistics` 的 `countedInGlobalOnly?`、`lease.ts` 的 `minSliceUsd?`、`session` 的按轴 bypass 字段——均以**通用命名、无产品观点**实现,以便落地后作为小 PR 贡献回 upstream,把永久冲突面收敛到近零。 + +--- + +## 15. 验收 + +- [ ] flag 关闭:与 main 逐字节一致。 +- [ ] 模型无组 / 某轴无配置:该轴走主线全局额(D9)。 +- [ ] 用户侧多源取最大值;提额仅个人用户可配、按指定档叠加(多条求和)且仅在有效期内;用户组/Key 无提额。 +- [ ] 提额账本无审批流(管理员直增删);过期由定时任务 DELETE,删行即时撤销;过期后不再参与计算。 +- [ ] 用户组为人均上限(按 user 自身消费计量)。 +- [ ] Key 侧独立 AND;命中按轴切分对应主线全局成本档(检查跳过);RPM/并发仍生效。 +- [ ] **完全切分(D13)**:命中轴消费**不计入**该轴主线全局额(不进 lease 播种、不扣全局 lease、不增 total/daily 计数);落账 `counted_in_*_global = !bypass*`。 +- [ ] **非对称切分**:仅配 userSide 时,消费不计入 User 全局但**仍计入** Key 全局。 +- [ ] **改分组不追溯**:模型加入/移出组只影响其后新行,历史消费归属不变(标记写入冻结)。 +- [ ] **展示分栏**:`my-usage`/额度卡/`cost-alert` 拆「计入全局 / 模型组单算」,两值之和 = 总消费;全局额度卡「已用」只取计入全局部分。 +- [ ] 五档越界返回正确 `MODEL_*` 码 + i18n(含模型组名 / 数值占位符)。 +- [ ] 模型成员全局互斥(DB 约束生效)。 +- [ ] **fail-open 不置 bypass**:模拟 Redis 故障,模型档 fail-open 时主线对应轴全局成本档仍执行(不双重放行)、且该消费仍计入全局(标记 true)——回归测试断言(§5.2 CRITICAL)。 +- [ ] **热路径缓存生效**:解析走进程内快照,写操作触发失效;模型无组场景每请求 0 新增 DB 往返。 +- [ ] **micro-benchmark**:flag on 且零配置时,p50/p99 转发前延迟相对 main 增量 < 约定阈值;lease 检查并行(非串行)。 +- [ ] **OPT-A(total 缓存)**:配置 total 模型限额时稳态每请求 0 次 total DB 聚合(命中 `total_cost:model:*`);未命中才查 DB 并异步写回;模型桶聚合**不**按 `counted_in_*_global` 过滤。 +- [ ] **OPT-B(小额度桶)**:设 `quotaModelLeaseMinSliceUsd` / 模型 percent 后小额度桶刷新频次下降;floor 被 remaining 收口、不超发;全 null 时与未优化逐字节一致。 +- [ ] **OPT-C(stale-while-revalidate)**:快照过期 / 失效后除进程首启外无请求阻塞于 DB 重建;并发刷新去重;失效经 pub/sub 广播、跨 Pod 一致。 +- [ ] **OPT-F(零组短路)**:全系统无模型组时 guard 在 per-model 查找前返回,flag on 零配置每请求 0 DB / 0 Redis。 +- [ ] 覆盖率 ≥80%;OpenAPI lint、i18n audit、typecheck、build 全绿。 + +--- + +## 16. 开放点 + +- **O1 提额申请流(已定稿)**:**当前系统不实现**用户自助申请/审批工作流(D14)。提额为管理员直接在 `quota_boost_grants` 增删授予行(无 pending/审批状态机)。自助申请→审批若未来需要,可在账本上叠加 `status` 列扩展,不影响现有结构。 +- **O5 切分口径(已定稿,v1 采纳完全切分)**:v1 采「**配置轴完全切分**」(D13/§5.3)——命中某轴模型限额时,该请求消费既跳过该轴全局检查、也**不计入**该轴主线全局额。实现为 `usage_ledger` **按轴打标**(`counted_in_user_global` / `counted_in_key_global`,写入期由 bypass 标记冻结),全局聚合按标记过滤(§3.6/§5.3/§6.1)。已否决初版「仅跳过检查、消费仍计入」(模型限额额度需全局额 ≥ 模型消费才用得满、分组消费污染未分组流量)与读取期 `NOT IN` 排除(追溯重分类 + 口径分裂,见 §16.1)。选型对比与被否决方案见 §16.1。 +- **O2 模型组匹配**:v1 精确模型名;前缀/通配模式放 v2。 +- **O3 RPM/并发模型维度**:v1 不纳入。 +- **O4 旁路粒度(已定稿)**:采「**按轴旁路**」(§5.1,双标记 `bypassUserGlobalCost` / `bypassKeyGlobalCost`)。理由:唯一同时满足 D3(Key 独立 AND)、D8(命中即覆盖)、D9(某轴未配置则回退 [用户管理])的方案——非对称情形下,未配置模型限额的那一轴**保留主线全局额护栏**,不会失去成本约束。已否决「任一轴命中即同时旁路两级」的单标记方案(会在非对称情形违背 D9,使未配置轴变为无成本上限)。 + +### 16.1 完全切分选型:按轴打标(已采纳)vs 读取期 `NOT IN`(已否决) + +> 结论:完全切分把全局额聚合从「模型无关」改成「模型感知」,改动量约为初版「仅跳过检查」(主线约 +45 行)的 **3~5 倍**。**v1 采纳「按轴写入期打标」**(§3.6),它把改动从「读写多路模型感知 + 强一致排除集」收敛为「写入打标 + 读取布尔过滤」,并规避读取期 `NOT IN` 的两个结构性缺陷(R1/R2)。 + +**主线全局额的三条计费路径(改动靶点,均模型无关)** + +| 路径 | 位置 | 作用 | +|---|---|---| +| lease 播种(5h/daily/weekly/monthly) | `lease-service.ts` `queryDbUsage` → `sumUserCostInTimeRange`/`sumKeyCostInTimeRange`(`statistics.ts`) | Redis 切片初值 = limit − DB 用量 | +| total 档计数 | `service.ts` `checkTotalCostLimit` → Redis `total_cost:*`(5min TTL)+ DB 兜底 `sumUserTotalCost`/`sumKeyTotalCost` | 永久额 | +| 回填扣减 | `response-handler.ts` `trackCost` / `trackUserDailyCost` / 8 个 `decrementLeaseBudget` | 每请求无条件递增/递减全局桶 | + +> 关键事实:`sumUserCostInTimeRange` / `sumKeyCostInTimeRange` / `sumUserTotalCost` **同时被「限额检查」与「展示/告警」复用**(`actions/users.ts`、`actions/keys.ts`、`actions/my-usage.ts`、`actions/key-quota.ts`、`notification/tasks/cost-alert.ts`)。这是风险 R1 的根因。 + +**已采纳:按轴写入期打标的改动点** + +谓词不是「模型在不在组」,而是「**该轴当时是否真被旁路**」(= `!bypassAxis`,与 §5.2 的 fail-open 守卫自洽)。改动点: + +1. **schema**:`usage_ledger` +2 列 `counted_in_user_global` / `counted_in_key_global`(默认 true,§3.6)。 +2. **落账打标**:`updateRequestCostFromUsage` 写两列 = `!bypassUserGlobalCost` / `!bypassKeyGlobalCost`(近零额外开销,标记已在 session 上)。 +3. **全局聚合按标记过滤**:`sumUserCostInTimeRange`/`sumUserTotalCost`/`sumKeyCostInTimeRange`/`sumKeyTotalCost` 加可选 `countedInGlobalOnly` 参数(默认 false → 与 main 一致),仅限额检查调用方传 true(§6.1)。 +4. **回填按轴跳过**:被旁路轴跳过其全局 `decrementLeaseBudget`;并跳过 `trackCost` 中**该轴的 5h-fixed 计数器写入**(唯一从 Redis 计数器播种的全局档,见 §5.2.4 CRITICAL)。其余档(daily/weekly/monthly/total/5h-rolling)从已过滤 DB 播种,故 `trackCost`/`trackUserDailyCost` 其余写入无需跳过。 +5. **展示分栏**:`my-usage`、用户/Key 额度卡、`cost-alert` 按标记拆「计入全局 / 模型组单算」两栏(§10)。 + +> 三处(DB 过滤、Redis 回填跳过、展示)由**同一对标记**驱动 → 无漂移、无 `NOT IN`、无追溯。「flag off / 模型无组 / fail-open」标记恒 true → 与 main 逐字节一致。 + +**已否决:读取期 `NOT IN` 排除(保留为否决依据)** + +读取期对全局聚合加 `AND model NOT IN (:已分组模型集)` 看似更省(无新列),但因上述聚合函数被「检查」与「展示/告警」复用,且历史行被追溯重判,有两个结构性缺陷,故否决: + +| 等级 | 缺陷 | 打标方案如何规避 | +|---|---|---| +| 高 | **R1 口径分裂** | 聚合函数被展示与执行复用,只改执行会出现「已用 \$100/上限 \$100 却不拦截」 | 标记同源,展示与执行用同一过滤,可一致拆栏 | +| 高 | **R2 追溯重分类** | `NOT IN` 对全部历史行生效:加入组的瞬间历史消费追溯退出全局桶(凭空多预算)、移出组则瞬间超限被拦 | 标记写入即冻结,改分组只影响未来行 | +| 中 | **R3 漂移窗口** | 排除集缓存与回填跳过短暂不一致时全局 lease 漂移至 re-seed 才自愈 | 标记冻结在行上,播种与扣减恒一致 | +| 低 | **R6 谓词歧义** | 「属任意组」还是「属有限额的组」?排除集随轴/用户而变 | 谓词即「该轴是否被旁路」,天然按轴、无歧义 | + +**两方案均需注意 R4(provider 不切分)**:O5 只涉 user/key 全局额,provider 桶仍按全模型聚合,须在文档/UI 明示。 + +--- + +## 17. 性能优化补充设计(OPT-A..F) + +> **规模基线:目标部署用户量 ≤ 1 万。** 下列设计据此取舍——OPT-E 明确**不做**懒加载拆分(避免过度设计);其余以「消除 p99 尖刺、保持稳态亚毫秒」为目标。本节为 §2/§4.7/§6/§14/§15 的修订与补充,落地以本节为准。 +> +> **落地后延迟基线(达成本节优化后)**:flag off / 模型无组 ≈ **0**;稳态单/双轴命中 ≈ **一次并行 Redis 往返(~0.3–1ms)**,且命中轴会跳过主线该轴串行成本档、净开销往往更小;偶发的 lease 刷新 tick ≈ 该 1 个请求 +1–5ms。相对一次数百 ms–数秒的上游 LLM 调用,稳态新增在端到端 p50 中占比 < 0.5%。 + +### 17.1 OPT-A:模型 total 档复刻主线 Redis 读穿缓存 + +**问题**:`total` 档无 lease 窗口;若每请求直连 DB 做 `model IN (members)` 近全 history 聚合,则任何配置了 total 模型限额的请求都在热路径上多一次随历史增长的聚合(现有 `ModelRateLimitService` 即此问题)。 + +**设计**(对齐主线 `checkTotalCostLimit`,`service.ts:456-481`): +- 缓存键 `total_cost:model:{scope}:{scopeId}:{groupId}[:{resetAtMs}]`,TTL `300s`。 +- 读穿:命中→用缓存值;未命中→`sumScopeCostByModelsInTimeRange(scope, scopeId, members, start, now)`,**异步**写回缓存(不阻塞请求),fail-open。 +- **与完全切分的关系(关键)**:模型桶聚合**不**按 `counted_in_*_global` 过滤——模型桶是「模型限额自己的预算」,统计该 scope 在组成员模型上的**全部**消费,与全局额是否旁路无关。`counted_in_*_global` 仅作用于**主线全局额**聚合(§6.1),二者预算独立。 +- **限额变更无需失效**:缓存的是**用量**(与 limit 无关);比较时用快照里最新的 `limitTotalUsd`。同主线,over-grant 上限 = TTL(5min),可接受。 + +### 17.2 OPT-B:模型桶 lease 切片下限 / 独立 percent + +**问题**:lease 切片 = `limit × percent`(默认 5%)。模型组限额常远小于全局额(如 \$5/日 → 切片 \$0.25),几个请求即耗尽 → 频繁 `refreshCostLeaseFromDb` → 热路径 DB,lease 的「摊薄 DB」红利在小桶上失效。 + +**设计**: +- 新增系统设置(`system_config` 表 + `SystemSettings` 类型 + cache 默认值;**全部 nullable、默认 null → 行为与今一致、零回归**): + - `quotaModelLeasePercent5h / Daily / Weekly / Monthly`:模型维度专用百分比,null 时回退全局 `quotaLeasePercent*`。 + - `quotaModelLeaseMinSliceUsd`:模型桶切片下限(floor),null 时无下限。 +- `calculateLeaseSlice`(`lib/rate-limit/lease.ts` 纯函数)**加可选参数** `minSliceUsd?`(默认 undefined → 主线调用方不受影响): + ``` + remaining = max(0, limitAmount - currentUsage) + base = limitAmount * percent + withFloor = minSliceUsd ? max(base, minSliceUsd) : base + withCap = capUsd ? min(withFloor, capUsd) : withFloor + slice = min(withCap, remaining) // 恒被 remaining 收口 → 不会过授 + ``` + 下限被 `remaining` 收口,故小额度桶最坏 = 一次性租掉全部剩余(等价精确计数),不会超发。 +- 仅 `ModelLeaseService.refreshCostLeaseFromDb` 传入 floor 与模型 percent;主线 lease 不变。 + +### 17.3 OPT-C:解析快照 stale-while-revalidate + +**问题**:`provider-cache` 式缓存在过期/失效时让**触发请求阻塞**等 DB 重建(约 5 条查询)——每 TTL 边界 / 每次写失效后那 1 个请求 +5~20ms。 + +**设计**(在 §4.7 的 L1 + Redis pub/sub 基础上,照 `provider-cache.ts`): +- `getModelLimitSnapshot`: + ``` + if (cache.data) { + if (fresh) return cache.data; + triggerBackgroundRefresh(); // 不 await + return cache.data; // 立即返回旧快照 + } + return await triggerBackgroundRefresh(); // 仅真冷启动阻塞 + ``` + `triggerBackgroundRefresh` 以 `refreshPromise` + `version` 去重(防并发刷新 / 防旧刷新覆盖新失效)。 +- **失效(pub/sub 收到)改为「标记过期 + 触发后台刷新」而非置 null**:写操作后各 Pod 继续服务上一版快照、后台重建,**无任何请求阻塞**;新增配置最多「上一刷新周期」内不生效(与已接受的「集群 ≤TTL 传播」一致)。仅进程首启(无任何旧数据)那一个请求 await。 +- TTL 仍保留作兜底(pub/sub fire-and-forget、可能丢消息)。 +- **写路径同步刷新(已定,Option 2)**:`publishModelLimitCacheInvalidation()` 在广播前**先 `await` 一次本地重建**(绑定模块级默认 fetcher `configureModelLimitCache(buildModelLimitSnapshot)`),使**写请求落到的那个 Pod 写后即时新鲜**(read-your-writes、测试确定);其余 Pod 收 pub/sub 后走 markStale + serve-stale。 + - **影响范围**:同步刷新只在**管理员写配置路径**(非代理热路径),终端用户代理请求延迟零影响;admin Save 多付一次重建(几 ms,可忽略)。 + - **边界澄清**:read-your-writes 仅对「写请求所在 Pod」成立;多 Pod 集群的集群级新鲜度由 **pub/sub 传播**决定(亚秒级,兜底 ≤TTL),与本选项无关。配置短暂陈旧只影响「用哪个 limit 值」,预算计数是 Redis 原子 + DB 权威,不会双花。 + +### 17.4 OPT-D:`counted_in_*_global` 部分索引(基准门控,默认不加) + +- 完全切分后,重度使用模型组的用户其 ledger 多数行 `counted=false`;主线全局聚合 `AND counted_in_*_global=true` 在 `(userId/key, createdAt)` 上做残余过滤、扫并丢弃 false 行。**仅影响全局 lease 刷新(每 ~10s),非每请求**,影响有界。 +- v1 **默认不加索引**(同原方案)。§15 micro-benchmark 增加「重度模型组用户的全局 lease 刷新」用例;**仅当**实测热点时再加: + ```sql + CREATE INDEX CONCURRENTLY idx_usage_ledger_user_counted ON usage_ledger (user_id, created_at) WHERE counted_in_user_global; + CREATE INDEX CONCURRENTLY idx_usage_ledger_key_counted ON usage_ledger (key, created_at) WHERE counted_in_key_global; + ``` +- ≤1 万用户量级下大概率无需;列为「观测后决策」。 + +### 17.5 OPT-E:规模假设 ≤1 万用户 → 单一全局快照,不做懒加载拆分 + +- 最坏即「每个用户都配个人模型限额」≈ ≤1 万行 `model_group_limits(subject=user)` + 少量组/用户组/Key 行 + 小提额集;进程内 Map 约数 MB,重建 ≈ 5 条索引查询、几 ms。**结论:v1 用单一全局进程内快照即可,明确不实现「全局快照 + 按 user 懒加载」拆分**(避免过度设计)。 +- 运营建议:优先用**用户组限额**(行数小)而非逐用户个人行。 +- 未来触发条件(仅记录,v1 不做):个人限额行数量级达 1e5+,或快照重建 p99 超阈值 → 再评估拆分。 + +### 17.6 OPT-F:零组系统的最早短路 + +- guard / resolver 入口:`if (snapshot.modelToGroup.size === 0) return null`(全系统无任何模型组时,连 per-model 查找都跳过)。保证「flag on 但功能实际未启用」严格零开销。 + +### 17.7 受影响的配置 / Schema / 章节同步 + +- **`system_config`(主线表变更)**:新增 5 列(OPT-B:`quota_model_lease_percent_5h/daily/weekly/monthly`、`quota_model_lease_min_slice_usd`),均 nullable、默认 null。随 §3.7 迁移一并 `db:generate`;同步更新 `SystemSettings` 类型与 `system-settings-cache.ts` 的 DEFAULT / fallback。 +- **§6** total 行已按 OPT-A 修正;**§4.7** 缓存参考已改为 `provider-cache.ts`(L1 + pub/sub)。**§8** `cache.ts` / `service.ts` 职责已补 OPT-A/C/F。 +- **§14 主线 diff 增量**:`system_config` +5 列;`lease.ts` 加 `minSliceUsd?` 参数(+~6 行,默认 null 时零行为变化);`cache.ts` subscribe/publish 接线均在热路径之外,「flag off / 模型无组 0 RTT」基线不变。 +- **§15** 已新增 OPT-A/B/C/F 验收项。 + +--- + +## 18. 测试场景目录(驱动 unit / integration / E2E) + +> 本节是 §4.6/§4.8/§11 的可执行化展开:每个场景给 `ID | 配置/前置 | 动作/序列 | 期望 | 覆盖`,ID 稳定可引用、直接映射到 §11 的测试层。约定层级:**unit**=resolver/纯函数/缓存(无 I/O 或 mock Redis);**integration**=真实 Redis+PG 的 service/guard/聚合;**E2E**=完整代理链路。 + +### 18.0 分析:OPT 引入 / 既有未枚举的新边界(立例理由) + +1. **OPT-A 反直觉**:模型 total 桶**统计被全局旁路的消费**(不按 `counted_in_*` 过滤)——易误写成漏算,单立 T-TA-3。 +2. **OPT-B 安全/回归**:`floor>remaining` 被 remaining 收口不超发(T-LF-2);`floor/percent` 全 null 与全局逐字节一致(T-LF-3)。 +3. **OPT-C 写进程也吃 stale**:`markStale` 保留旧数据 + serve-stale → 连发起写的 Pod 在后台刷新前也短暂返回旧配置。**待决(T-SC-6)**:写路径是否对本 Pod 强制同步刷新。 +4. **逐档跨源 max**:不同窗口由不同 source 取胜(T-RS-4)。 +5. **null + 提额**:null 取胜、提额忽略(T-RS-6);提额落在无 source 的窗口→按档惰性(T-BO-5)。 +6. **counted_in 默认 true 四路径**:flag off / 无组 / fail-open / 历史行各自断言(T-PT-7 / T-FO-2)。 +7. **改分组不追溯**:写入冻结,需"先消费→后改组→归属不变"序列(T-PT-6)。 + +### 18.1 Resolver / 合并 / 提额解析(unit) + +| ID | 配置/前置 | 动作/序列 | 期望 | 覆盖 | +|---|---|---|---|---| +| T-RS-1 | opus∈g-opus | resolve(opus) | G=g-opus | §4.1/D7 | +| T-RS-2 | sonnet 不属任何组 | resolve(sonnet) | enforced=[],两轴回退主线 | D9 | +| T-RS-3 | 个人(user,g)日$10 + tag team-a(user_group,g)日$30 | resolve | userSide 日=max(10,30)=30,bucket 按 user 自身计量 | D4/D5 | +| T-RS-4 | 个人 daily$10/weekly$100;组 daily$30/weekly$50 | resolve | daily=30(组)、weekly=100(个人)——逐档跨源取胜 | D4 | +| T-RS-5 | 仅 key(key,g)日$5;user 无配置 | resolve | keySide=5(独立 AND);userSide=null | D3/D9 | +| T-RS-6 | 个人某档=null(无限),组该档$30 | resolve | 该档=null(无限取胜);若有提额则被忽略 | §4.4 | +| T-RS-7 | 个人 + 两个用户组均命中 | resolve | 逐档 max over 三源 | D4 | + +### 18.2 提额账本(unit + integration) + +| ID | 配置/前置 | 动作/序列 | 期望 | 覆盖 | +|---|---|---|---|---| +| T-BO-1 | 无个人行、仅组限额$30 + 提额 daily+$50 期内 | resolve | personalBase=groupMax$30→eff=80→cap=max(30,80)=80 | F1 | +| T-BO-2 | 同上 boost=0 | resolve | 虚拟 source=groupMax$30,与无提额一致(无回归) | F1 | +| T-BO-3 | 无任何 source + 误授提额 | resolve | userSide=null,提额惰性、不凭空建限额 | §4.4 | +| T-BO-4 | 两条重叠有效期 +$50/+$20 | resolve | 个人档 +70(多条求和) | D10 | +| T-BO-5 | 提额 window=weekly 但无任何 weekly source | resolve | weekly 档惰性 no-op | §4.4 | +| T-BO-6 | 提额 validFrom 在未来 | nownow` 兜底不误生效 | §7.1 | +| T-BO-8 | 过期 DELETE 执行后 | resolve | 行消失、不参与计算 | D12 | +| T-BO-9 | 删授予行=撤销 | publish 后 resolve | 即时撤销(pub/sub) | D14 | +| T-BO-10 | 试图给 user_group / key 提额 | API/校验 | 拒绝(仅个人用户) | D11 | + +### 18.3 完全切分 / 按轴打标(integration,real PG+Redis) + +| ID | 配置/前置 | 动作/序列 | 期望 | 覆盖 | +|---|---|---|---|---| +| T-PT-1 | (user,g-opus)日$30;用户全局日$10 | opus×8@$3 后 sonnet$5 | opus$24 计模型桶、counted_user=false 不污染全局;全局仅$5→sonnet 放行 | §4.8-A | +| T-PT-2 | 仅(user,g);KeyK 全局日$8;无(key,g) | opus×3@$3 | counted_user=false、counted_key=true→第3条被 Key 全局$8 拦 | §4.8-B | +| T-PT-3 | userSide 命中(非 fail-open) | 落账 | counted_in_user_global=!bypassUser=false;counted_key=true | §5.2.4 | +| T-PT-4 | 命中后 | 全局聚合 vs 模型桶聚合 | 全局只统计 counted=true;模型桶统计全部 | §6.1 | +| T-PT-5 | 命中后 | 展示分栏 | 计入全局 + 模型组单算 = 总消费;全局额度卡"已用"只取计入全局 | §5.3/§10 | +| T-PT-6 | 先消费→模型加入组→再查 | 历史归属 | 写入冻结、不追溯重分类 | §16.1-R2 | +| T-PT-7 | flag off / 无组 / 历史行 | 落账 | counted_in_*=true(默认) | §3.6 | + +### 18.4 OPT-A:模型 total 读穿缓存(integration) + +| ID | 配置/前置 | 动作/序列 | 期望 | 覆盖 | +|---|---|---|---|---| +| T-TA-1 | total 模型限额$100 | 同实体连发 N 请求 | 稳态每请求 0 次 total DB 聚合(命中 `total_cost:model:*`) | OPT-A | +| T-TA-2 | 缓存未命中 | 首请求 | 查 DB + 异步写回,不阻塞请求 | OPT-A | +| T-TA-3 | userSide 命中、消费被全局旁路 | 模型 total 聚合 | **仍计入**该消费(不按 counted_in 过滤) | OPT-A 关键 | +| T-TA-4 | 累计≥$100 | 下一请求 | MODEL_TOTAL_* 错误码 + i18n | §11 | +| T-TA-5 | 缓存 TTL(300s) 过期 | 过期后请求 | 重新查 DB 并写回 | OPT-A | + +### 18.5 OPT-B:lease 切片 floor / 模型 percent(unit + integration) + +| ID | 配置/前置 | 动作/序列 | 期望 | 覆盖 | +|---|---|---|---|---| +| T-LF-1 | limit=$5/日,floor=$1,percent=5% | calculateLeaseSlice | slice=max($0.25,$1)=$1(floor 生效) | OPT-B | +| T-LF-2 | remaining=$0.5 < floor$1 | calculateLeaseSlice | slice=min($1,$0.5)=$0.5(被 remaining 收口、不超发) | OPT-B 安全 | +| T-LF-3 | minSliceUsd=null、模型 percent=null | calculateLeaseSlice | =limit×全局 percent(与未优化逐字节一致) | OPT-B 回归 | +| T-LF-4 | 模型 percent=20%、全局5% | refresh | 用 20%(模型覆盖全局) | OPT-B | +| T-LF-5 | floor > quotaLeaseCapUsd | calculateLeaseSlice | cap 取胜(min) | OPT-B | +| T-LF-6 | 小桶 + floor vs 无 floor | 同序列请求计 refresh 次数 | 有 floor 时 refresh 次数显著下降 | OPT-B 效果 | + +### 18.6 OPT-C:快照缓存 stale-while-revalidate + pub/sub(unit + integration) + +| ID | 配置/前置 | 动作/序列 | 期望 | 覆盖 | +|---|---|---|---|---| +| T-SC-1 | warm | getModelLimitSnapshot | 即时返回、0 DB | OPT-C | +| T-SC-2 | stale(有旧数据) | read | 返回旧快照 + 触发后台刷新、请求不阻塞 | OPT-C | +| T-SC-3 | 冷(data=null) | read | await 重建(仅进程首次) | OPT-C | +| T-SC-4 | pub/sub 收到失效 | 之后 read | markStale 保留旧数据、serve stale 直到刷新落地 | OPT-C | +| T-SC-5 | 并发刷新 | 多请求同时触发 | refreshPromise 去重、version 防旧刷新覆盖新失效 | OPT-C | +| T-SC-6 | 写进程本地(Option 2:写路径同步刷新) | `await publishModelLimitCacheInvalidation()` 后同 Pod read | 快照**已含**新写入行(read-your-writes、确定性断言,无时序 flaky) | OPT-C / §17.3 | +| T-SC-7 | Redis 不可用 | subscribe 失败 | 降级到 TTL、不抛异常 | OPT-C | + +### 18.7 Guard / pipeline / fail-open / 互斥(unit + integration) + +| ID | 配置/前置 | 动作/序列 | 期望 | 覆盖 | +|---|---|---|---|---| +| T-GD-1 | flag off | 全链路 | 与 main 逐字节一致(无 model guard) | §13/§15 | +| T-GD-2 | flag on | pipeline build | modelRateLimit 在 rateLimit 之前(insertBefore) | §5.2.1 | +| T-GD-3 | userSide 命中 | guard | bypassUserGlobalCost=true、主线 User 成本档跳过 | §5.2 | +| T-GD-4 | keySide 命中 | guard | bypassKeyGlobalCost=true | §5.2 | +| T-GD-5 | 命中任意轴 | guard | RPM/并发仍检查(不旁路) | D8 | +| T-FO-1 | Redis 故障 | 模型档 fail-open | **不置 bypass**、主线全局档仍执行(不双重放行) | §5.2 CRITICAL | +| T-FO-2 | fail-open | 落账 | counted_in_*=true(仍计入全局) | §4.8-F | +| T-FO-3 | 一档 fail-open、其余通过 | checkBucket | failOpen=true→不置 bypass | §5.2 | +| T-OF-1 | 全系统 0 组 | guard | per-model 查找前短路、0 DB/0 Redis | OPT-F | +| T-MX-1 | 加已属他组的模型 | repo/API | 唯一约束/校验报错 | D6 | +| T-SM-1 | 合并护栏(落地后常驻 CI) | 断言 seam 锚点存在 | `registerExtensionStep`/`insertBefore`/4 个 sum 签名/`checkTotalCostLimit`/`calculateLeaseSlice(minSliceUsd?)` 仍在;upstream 改名即红 | 合并计划 §6 | + +### 18.8 E2E 端到端旅程(real proxy + Redis + PG) + +| ID | 配置/前置 | 动作/序列 | 期望 | 覆盖 | +|---|---|---|---|---| +| T-E2E-1 | flag off | 正常代理一轮 | 行为/计量与 main 一致 | 回归 | +| T-E2E-2 | (user,g)日$30 | opus 连发至耗尽 | 第 N 条 MODEL_DAILY_*;全局额未被消耗(切分) | D13 | +| T-E2E-3 | 非对称(仅 userSide,Key 全局$8) | opus 连发 | Key 全局先拦 | §4.8-B | +| T-E2E-4 | (user,g)日$30 + 提额+$50 期内 | opus 连发;跨越有效期 | 期内上限$80、期外回$30 | D10/F2 | +| T-E2E-5 | team-a(user_group,g)日$30,U5/U7 | 两用户各发 | 各自独立$30(人均,非共享) | D5 | +| T-E2E-6 | Redis 故障注入 | opus 请求 | 模型档 fail-open + 主线全局档兜底拦截 | §5.2 | +| T-E2E-7 | (user,g)日$30 + User RPM=5 | 6 连发 | 第 6 条 RPM 拦(模型限额不旁路 RPM) | D8 | +| T-E2E-8 | total 模型$100 | 连发跨越$100 | total 缓存命中稳态、越界 MODEL_TOTAL | OPT-A | +| T-E2E-9 | **降级为 integration(已定,Option B)**:单进程 + 真 Redis pub/sub | publish 失效 → 之后 read | 订阅者 markStale 触发、下一次重建反映 DB 变更 | P-1 / OPT-C | + +> **决策记录**: +> - **T-SC-6 → Option 2(写路径同步刷新)**:写 Pod 即时新鲜 + 测试确定;只在 admin 写路径,终端用户代理请求零影响(详见 §17.3)。 +> - **T-E2E-9 → Option B(integration,单进程 + 真 Redis pub/sub)**:覆盖订阅接线/channel/失效→重建;pub/sub 机制已由 `provider-cache.ts` / `circuit-breaker.ts` 现网验证,真·双实例 E2E 后置/不做。 diff --git a/drizzle/0105_broad_crystal.sql b/drizzle/0105_broad_crystal.sql new file mode 100644 index 000000000..dcba93c75 --- /dev/null +++ b/drizzle/0105_broad_crystal.sql @@ -0,0 +1,223 @@ +DO $$ BEGIN + CREATE TYPE "public"."boost_window" AS ENUM('5h', 'daily', 'weekly', 'monthly', 'total'); +EXCEPTION + WHEN duplicate_object THEN null; +END $$;--> statement-breakpoint +DO $$ BEGIN + CREATE TYPE "public"."limit_subject" AS ENUM('user', 'key', 'user_group'); +EXCEPTION + WHEN duplicate_object THEN null; +END $$;--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "model_group_limits" ( + "id" serial PRIMARY KEY NOT NULL, + "subject_type" "limit_subject" NOT NULL, + "subject_id" integer NOT NULL, + "model_group_id" integer NOT NULL, + "rpm_limit" integer, + "limit_5h_usd" numeric(10, 2), + "limit_5h_reset_mode" "daily_reset_mode" DEFAULT 'fixed' NOT NULL, + "daily_limit_usd" numeric(10, 2), + "limit_weekly_usd" numeric(10, 2), + "limit_monthly_usd" numeric(10, 2), + "limit_total_usd" numeric(10, 2), + "limit_5h_cost_reset_at" timestamp with time zone, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "model_group_members" ( + "id" serial PRIMARY KEY NOT NULL, + "model_group_id" integer NOT NULL, + "model" varchar(128) NOT NULL, + "created_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "model_groups" ( + "id" serial PRIMARY KEY NOT NULL, + "name" varchar(128) NOT NULL, + "description" text, + "is_singleton" boolean DEFAULT false NOT NULL, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "quota_boost_grants" ( + "id" serial PRIMARY KEY NOT NULL, + "user_id" integer NOT NULL, + "model_group_id" integer NOT NULL, + "window" "boost_window" NOT NULL, + "amount_usd" numeric(10, 2) NOT NULL, + "valid_from" timestamp with time zone NOT NULL, + "valid_to" timestamp with time zone NOT NULL, + "note" text, + "created_by" integer, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +CREATE TABLE IF NOT EXISTS "user_groups" ( + "id" serial PRIMARY KEY NOT NULL, + "tag" varchar(255) NOT NULL, + "name" varchar(128), + "description" text, + "created_at" timestamp with time zone DEFAULT now() NOT NULL, + "updated_at" timestamp with time zone DEFAULT now() NOT NULL +); +--> statement-breakpoint +ALTER TABLE "message_request" ADD COLUMN IF NOT EXISTS "counted_in_user_global" boolean DEFAULT true NOT NULL;--> statement-breakpoint +ALTER TABLE "message_request" ADD COLUMN IF NOT EXISTS "counted_in_key_global" boolean DEFAULT true NOT NULL;--> statement-breakpoint +ALTER TABLE "system_settings" ADD COLUMN IF NOT EXISTS "quota_model_lease_percent_5h" numeric(5, 4);--> statement-breakpoint +ALTER TABLE "system_settings" ADD COLUMN IF NOT EXISTS "quota_model_lease_percent_daily" numeric(5, 4);--> statement-breakpoint +ALTER TABLE "system_settings" ADD COLUMN IF NOT EXISTS "quota_model_lease_percent_weekly" numeric(5, 4);--> statement-breakpoint +ALTER TABLE "system_settings" ADD COLUMN IF NOT EXISTS "quota_model_lease_percent_monthly" numeric(5, 4);--> statement-breakpoint +ALTER TABLE "system_settings" ADD COLUMN IF NOT EXISTS "quota_model_lease_min_slice_usd" numeric(10, 2);--> statement-breakpoint +ALTER TABLE "usage_ledger" ADD COLUMN IF NOT EXISTS "counted_in_user_global" boolean DEFAULT true NOT NULL;--> statement-breakpoint +ALTER TABLE "usage_ledger" ADD COLUMN IF NOT EXISTS "counted_in_key_global" boolean DEFAULT true NOT NULL;--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "model_group_limits" ADD CONSTRAINT "model_group_limits_model_group_id_model_groups_id_fk" FOREIGN KEY ("model_group_id") REFERENCES "public"."model_groups"("id") ON DELETE cascade ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$;--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "model_group_members" ADD CONSTRAINT "model_group_members_model_group_id_model_groups_id_fk" FOREIGN KEY ("model_group_id") REFERENCES "public"."model_groups"("id") ON DELETE cascade ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$;--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "quota_boost_grants" ADD CONSTRAINT "quota_boost_grants_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$;--> statement-breakpoint +DO $$ BEGIN + ALTER TABLE "quota_boost_grants" ADD CONSTRAINT "quota_boost_grants_model_group_id_model_groups_id_fk" FOREIGN KEY ("model_group_id") REFERENCES "public"."model_groups"("id") ON DELETE cascade ON UPDATE no action; +EXCEPTION + WHEN duplicate_object THEN null; +END $$;--> statement-breakpoint +CREATE UNIQUE INDEX IF NOT EXISTS "model_group_limits_uniq_idx" ON "model_group_limits" USING btree ("subject_type","subject_id","model_group_id");--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "model_group_limits_subject_idx" ON "model_group_limits" USING btree ("subject_type","subject_id");--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "model_group_limits_group_idx" ON "model_group_limits" USING btree ("model_group_id");--> statement-breakpoint +CREATE UNIQUE INDEX IF NOT EXISTS "model_group_members_model_idx" ON "model_group_members" USING btree ("model");--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "model_group_members_group_idx" ON "model_group_members" USING btree ("model_group_id");--> statement-breakpoint +CREATE UNIQUE INDEX IF NOT EXISTS "model_groups_name_idx" ON "model_groups" USING btree ("name");--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "quota_boost_grants_target_idx" ON "quota_boost_grants" USING btree ("user_id","model_group_id","window");--> statement-breakpoint +CREATE INDEX IF NOT EXISTS "quota_boost_grants_valid_to_idx" ON "quota_boost_grants" USING btree ("valid_to");--> statement-breakpoint +CREATE UNIQUE INDEX IF NOT EXISTS "user_groups_tag_idx" ON "user_groups" USING btree ("tag");--> statement-breakpoint +-- Update fn_upsert_usage_ledger so the new message_request.counted_in_*_global +-- markers are copied into usage_ledger. Without this the installed trigger (from +-- an earlier migration) keeps writing the defaults (true) on the ledger row, so a +-- model-split request marked counted=false on message_request would still be +-- counted globally in DB/lease aggregations. Mirror of src/lib/ledger-backfill/trigger.sql. +CREATE OR REPLACE FUNCTION fn_upsert_usage_ledger() +RETURNS TRIGGER AS $$ +DECLARE + v_final_provider_id integer; + v_is_success boolean; + v_success_rate_outcome varchar; +BEGIN + v_success_rate_outcome := fn_compute_message_request_success_rate_outcome( + NEW.blocked_by, + NEW.status_code, + NEW.error_message, + NEW.provider_chain + ); + + IF NEW.blocked_by = 'warmup' THEN + -- If a ledger row already exists (row was originally non-warmup), mark it as warmup + -- and sync the latest actual_response_model so audit stays consistent across tables. + UPDATE usage_ledger + SET blocked_by = 'warmup', + success_rate_outcome = v_success_rate_outcome, + actual_response_model = NEW.actual_response_model + WHERE request_id = NEW.id; + RETURN NEW; + END IF; + + IF LOWER(REGEXP_REPLACE(COALESCE(NEW.endpoint, ''), '/+$', '')) + IN ('/v1/messages/count_tokens', '/v1/responses/compact') THEN + DELETE FROM usage_ledger WHERE request_id = NEW.id; + RETURN NEW; + END IF; + + IF NEW.provider_chain IS NOT NULL + AND jsonb_typeof(NEW.provider_chain) = 'array' + AND jsonb_array_length(NEW.provider_chain) > 0 + AND jsonb_typeof(NEW.provider_chain -> -1) = 'object' + AND (NEW.provider_chain -> -1 ? 'id') + AND (NEW.provider_chain -> -1 ->> 'id') ~ '^[0-9]+$' THEN + v_final_provider_id := (NEW.provider_chain -> -1 ->> 'id')::integer; + ELSE + v_final_provider_id := NEW.provider_id; + END IF; + + v_is_success := (NEW.error_message IS NULL OR NEW.error_message = '') + AND (NEW.status_code IS NULL OR NEW.status_code < 400); + + INSERT INTO usage_ledger ( + request_id, user_id, key, provider_id, final_provider_id, + model, original_model, actual_response_model, endpoint, api_type, session_id, + status_code, is_success, success_rate_outcome, blocked_by, + cost_usd, cost_multiplier, group_cost_multiplier, + input_tokens, output_tokens, + cache_creation_input_tokens, cache_read_input_tokens, + cache_creation_5m_input_tokens, cache_creation_1h_input_tokens, + cache_ttl_applied, context_1m_applied, swap_cache_ttl_applied, + duration_ms, ttfb_ms, client_ip, created_at, + counted_in_user_global, counted_in_key_global + ) VALUES ( + NEW.id, NEW.user_id, NEW.key, NEW.provider_id, v_final_provider_id, + NEW.model, NEW.original_model, NEW.actual_response_model, NEW.endpoint, NEW.api_type, NEW.session_id, + NEW.status_code, v_is_success, v_success_rate_outcome, NEW.blocked_by, + NEW.cost_usd, NEW.cost_multiplier, NEW.group_cost_multiplier, + NEW.input_tokens, NEW.output_tokens, + NEW.cache_creation_input_tokens, NEW.cache_read_input_tokens, + NEW.cache_creation_5m_input_tokens, NEW.cache_creation_1h_input_tokens, + NEW.cache_ttl_applied, NEW.context_1m_applied, NEW.swap_cache_ttl_applied, + NEW.duration_ms, NEW.ttfb_ms, NEW.client_ip, NEW.created_at, + COALESCE(NEW.counted_in_user_global, true), COALESCE(NEW.counted_in_key_global, true) + ) + ON CONFLICT (request_id) DO UPDATE SET + user_id = EXCLUDED.user_id, + key = EXCLUDED.key, + provider_id = EXCLUDED.provider_id, + final_provider_id = EXCLUDED.final_provider_id, + model = EXCLUDED.model, + original_model = EXCLUDED.original_model, + actual_response_model = EXCLUDED.actual_response_model, + endpoint = EXCLUDED.endpoint, + api_type = EXCLUDED.api_type, + session_id = EXCLUDED.session_id, + status_code = EXCLUDED.status_code, + is_success = EXCLUDED.is_success, + success_rate_outcome = EXCLUDED.success_rate_outcome, + blocked_by = EXCLUDED.blocked_by, + cost_usd = EXCLUDED.cost_usd, + cost_multiplier = EXCLUDED.cost_multiplier, + group_cost_multiplier = EXCLUDED.group_cost_multiplier, + input_tokens = EXCLUDED.input_tokens, + output_tokens = EXCLUDED.output_tokens, + cache_creation_input_tokens = EXCLUDED.cache_creation_input_tokens, + cache_read_input_tokens = EXCLUDED.cache_read_input_tokens, + cache_creation_5m_input_tokens = EXCLUDED.cache_creation_5m_input_tokens, + cache_creation_1h_input_tokens = EXCLUDED.cache_creation_1h_input_tokens, + cache_ttl_applied = EXCLUDED.cache_ttl_applied, + context_1m_applied = EXCLUDED.context_1m_applied, + swap_cache_ttl_applied = EXCLUDED.swap_cache_ttl_applied, + duration_ms = EXCLUDED.duration_ms, + ttfb_ms = EXCLUDED.ttfb_ms, + client_ip = EXCLUDED.client_ip, + counted_in_user_global = EXCLUDED.counted_in_user_global, + counted_in_key_global = EXCLUDED.counted_in_key_global; + -- created_at deliberately NOT updated on conflict: it represents the + -- original insert time of the ledger row, which is immutable by design. + + RETURN NEW; +EXCEPTION WHEN OTHERS THEN + RAISE WARNING 'fn_upsert_usage_ledger failed for request_id=%: %', NEW.id, SQLERRM; + RETURN NEW; +END; +$$ LANGUAGE plpgsql;--> statement-breakpoint +DROP TRIGGER IF EXISTS trg_upsert_usage_ledger ON message_request;--> statement-breakpoint +CREATE TRIGGER trg_upsert_usage_ledger +AFTER INSERT OR UPDATE ON message_request +FOR EACH ROW +EXECUTE FUNCTION fn_upsert_usage_ledger(); \ No newline at end of file diff --git a/drizzle/meta/0105_snapshot.json b/drizzle/meta/0105_snapshot.json new file mode 100644 index 000000000..0cd44c104 --- /dev/null +++ b/drizzle/meta/0105_snapshot.json @@ -0,0 +1,5156 @@ +{ + "id": "411a6b3f-ffe2-467a-aa78-936c5924aefb", + "prevId": "e287317d-0fc7-4491-960d-b22636dc9471", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.audit_log": { + "name": "audit_log", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "action_category": { + "name": "action_category", + "type": "varchar(32)", + "primaryKey": false, + "notNull": true + }, + "action_type": { + "name": "action_type", + "type": "varchar(64)", + "primaryKey": false, + "notNull": true + }, + "target_type": { + "name": "target_type", + "type": "varchar(32)", + "primaryKey": false, + "notNull": false + }, + "target_id": { + "name": "target_id", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "target_name": { + "name": "target_name", + "type": "varchar(256)", + "primaryKey": false, + "notNull": false + }, + "before_value": { + "name": "before_value", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "after_value": { + "name": "after_value", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "operator_user_id": { + "name": "operator_user_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "operator_user_name": { + "name": "operator_user_name", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "operator_key_id": { + "name": "operator_key_id", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "operator_key_name": { + "name": "operator_key_name", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "operator_ip": { + "name": "operator_ip", + "type": "varchar(45)", + "primaryKey": false, + "notNull": false + }, + "user_agent": { + "name": "user_agent", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "success": { + "name": "success", + "type": "boolean", + "primaryKey": false, + "notNull": true + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "idx_audit_log_category_created_at": { + "name": "idx_audit_log_category_created_at", + "columns": [ + { + "expression": "action_category", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_audit_log_operator_user_created_at": { + "name": "idx_audit_log_operator_user_created_at", + "columns": [ + { + "expression": "operator_user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"audit_log\".\"operator_user_id\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_audit_log_operator_ip_created_at": { + "name": "idx_audit_log_operator_ip_created_at", + "columns": [ + { + "expression": "operator_ip", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"audit_log\".\"operator_ip\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_audit_log_target": { + "name": "idx_audit_log_target", + "columns": [ + { + "expression": "target_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "target_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"audit_log\".\"target_type\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_audit_log_created_at_id": { + "name": "idx_audit_log_created_at_id", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + }, + { + "expression": "id", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.error_rules": { + "name": "error_rules", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "pattern": { + "name": "pattern", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "match_type": { + "name": "match_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'regex'" + }, + "category": { + "name": "category", + "type": "varchar(50)", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "override_response": { + "name": "override_response", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "override_status_code": { + "name": "override_status_code", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "is_default": { + "name": "is_default", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": { + "idx_error_rules_enabled": { + "name": "idx_error_rules_enabled", + "columns": [ + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "priority", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "unique_pattern": { + "name": "unique_pattern", + "columns": [ + { + "expression": "pattern", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_category": { + "name": "idx_category", + "columns": [ + { + "expression": "category", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_match_type": { + "name": "idx_match_type", + "columns": [ + { + "expression": "match_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.keys": { + "name": "keys", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "key": { + "name": "key", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "can_login_web_ui": { + "name": "can_login_web_ui", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "limit_5h_usd": { + "name": "limit_5h_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_5h_reset_mode": { + "name": "limit_5h_reset_mode", + "type": "daily_reset_mode", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'rolling'" + }, + "limit_daily_usd": { + "name": "limit_daily_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "daily_reset_mode": { + "name": "daily_reset_mode", + "type": "daily_reset_mode", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'fixed'" + }, + "daily_reset_time": { + "name": "daily_reset_time", + "type": "varchar(5)", + "primaryKey": false, + "notNull": true, + "default": "'00:00'" + }, + "limit_weekly_usd": { + "name": "limit_weekly_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_monthly_usd": { + "name": "limit_monthly_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_total_usd": { + "name": "limit_total_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "cost_reset_at": { + "name": "cost_reset_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "limit_concurrent_sessions": { + "name": "limit_concurrent_sessions", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "provider_group": { + "name": "provider_group", + "type": "varchar(200)", + "primaryKey": false, + "notNull": false, + "default": "'default'" + }, + "cache_ttl_preference": { + "name": "cache_ttl_preference", + "type": "varchar(10)", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_keys_user_id": { + "name": "idx_keys_user_id", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_keys_key": { + "name": "idx_keys_key", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_keys_created_at": { + "name": "idx_keys_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_keys_deleted_at": { + "name": "idx_keys_deleted_at", + "columns": [ + { + "expression": "deleted_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.message_request": { + "name": "message_request", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "provider_id": { + "name": "provider_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "key": { + "name": "key", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "cost_usd": { + "name": "cost_usd", + "type": "numeric(21, 15)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "cost_multiplier": { + "name": "cost_multiplier", + "type": "numeric(10, 4)", + "primaryKey": false, + "notNull": false + }, + "group_cost_multiplier": { + "name": "group_cost_multiplier", + "type": "numeric(10, 4)", + "primaryKey": false, + "notNull": false + }, + "cost_breakdown": { + "name": "cost_breakdown", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "session_id": { + "name": "session_id", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "request_sequence": { + "name": "request_sequence", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 1 + }, + "provider_chain": { + "name": "provider_chain", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "status_code": { + "name": "status_code", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "api_type": { + "name": "api_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "endpoint": { + "name": "endpoint", + "type": "varchar(256)", + "primaryKey": false, + "notNull": false + }, + "original_model": { + "name": "original_model", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "actual_response_model": { + "name": "actual_response_model", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "input_tokens": { + "name": "input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "output_tokens": { + "name": "output_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "ttfb_ms": { + "name": "ttfb_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "cache_creation_input_tokens": { + "name": "cache_creation_input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_read_input_tokens": { + "name": "cache_read_input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_creation_5m_input_tokens": { + "name": "cache_creation_5m_input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_creation_1h_input_tokens": { + "name": "cache_creation_1h_input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_ttl_applied": { + "name": "cache_ttl_applied", + "type": "varchar(10)", + "primaryKey": false, + "notNull": false + }, + "context_1m_applied": { + "name": "context_1m_applied", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "swap_cache_ttl_applied": { + "name": "swap_cache_ttl_applied", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "special_settings": { + "name": "special_settings", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "hedge_losers": { + "name": "hedge_losers", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "error_stack": { + "name": "error_stack", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "error_cause": { + "name": "error_cause", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "blocked_by": { + "name": "blocked_by", + "type": "varchar(50)", + "primaryKey": false, + "notNull": false + }, + "blocked_reason": { + "name": "blocked_reason", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "user_agent": { + "name": "user_agent", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "client_ip": { + "name": "client_ip", + "type": "varchar(45)", + "primaryKey": false, + "notNull": false + }, + "messages_count": { + "name": "messages_count", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "counted_in_user_global": { + "name": "counted_in_user_global", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "counted_in_key_global": { + "name": "counted_in_key_global", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_message_request_user_date_cost": { + "name": "idx_message_request_user_date_cost", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "cost_usd", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_user_created_at_cost_stats": { + "name": "idx_message_request_user_created_at_cost_stats", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "cost_usd", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND (\"message_request\".\"blocked_by\" IS NULL OR \"message_request\".\"blocked_by\" <> 'warmup')", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_user_query": { + "name": "idx_message_request_user_query", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_provider_created_at_active": { + "name": "idx_message_request_provider_created_at_active", + "columns": [ + { + "expression": "provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND (\"message_request\".\"blocked_by\" IS NULL OR \"message_request\".\"blocked_by\" <> 'warmup')", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_provider_created_at_finalized_active": { + "name": "idx_message_request_provider_created_at_finalized_active", + "columns": [ + { + "expression": "provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND \"message_request\".\"status_code\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_session_id": { + "name": "idx_message_request_session_id", + "columns": [ + { + "expression": "session_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_session_id_prefix": { + "name": "idx_message_request_session_id_prefix", + "columns": [ + { + "expression": "\"session_id\" varchar_pattern_ops", + "asc": true, + "isExpression": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND (\"message_request\".\"blocked_by\" IS NULL OR \"message_request\".\"blocked_by\" <> 'warmup')", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_session_seq": { + "name": "idx_message_request_session_seq", + "columns": [ + { + "expression": "session_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "request_sequence", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_endpoint": { + "name": "idx_message_request_endpoint", + "columns": [ + { + "expression": "endpoint", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_blocked_by": { + "name": "idx_message_request_blocked_by", + "columns": [ + { + "expression": "blocked_by", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_provider_id": { + "name": "idx_message_request_provider_id", + "columns": [ + { + "expression": "provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_user_id": { + "name": "idx_message_request_user_id", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_key": { + "name": "idx_message_request_key", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_key_created_at_id": { + "name": "idx_message_request_key_created_at_id", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + }, + { + "expression": "id", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_key_model_active": { + "name": "idx_message_request_key_model_active", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "model", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND \"message_request\".\"model\" IS NOT NULL AND (\"message_request\".\"blocked_by\" IS NULL OR \"message_request\".\"blocked_by\" <> 'warmup')", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_key_endpoint_active": { + "name": "idx_message_request_key_endpoint_active", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "endpoint", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND \"message_request\".\"endpoint\" IS NOT NULL AND (\"message_request\".\"blocked_by\" IS NULL OR \"message_request\".\"blocked_by\" <> 'warmup')", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_created_at_id_active": { + "name": "idx_message_request_created_at_id_active", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + }, + { + "expression": "id", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_model_active": { + "name": "idx_message_request_model_active", + "columns": [ + { + "expression": "model", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND \"message_request\".\"model\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_status_code_active": { + "name": "idx_message_request_status_code_active", + "columns": [ + { + "expression": "status_code", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND \"message_request\".\"status_code\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_created_at": { + "name": "idx_message_request_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_deleted_at": { + "name": "idx_message_request_deleted_at", + "columns": [ + { + "expression": "deleted_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_key_last_active": { + "name": "idx_message_request_key_last_active", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND (\"message_request\".\"blocked_by\" IS NULL OR \"message_request\".\"blocked_by\" <> 'warmup')", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_key_cost_active": { + "name": "idx_message_request_key_cost_active", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "cost_usd", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND (\"message_request\".\"blocked_by\" IS NULL OR \"message_request\".\"blocked_by\" <> 'warmup')", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_session_user_info": { + "name": "idx_message_request_session_user_info", + "columns": [ + { + "expression": "session_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_message_request_client_ip_created_at": { + "name": "idx_message_request_client_ip_created_at", + "columns": [ + { + "expression": "client_ip", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"message_request\".\"deleted_at\" IS NULL AND \"message_request\".\"client_ip\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.model_group_limits": { + "name": "model_group_limits", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "subject_type": { + "name": "subject_type", + "type": "limit_subject", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "subject_id": { + "name": "subject_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "model_group_id": { + "name": "model_group_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "rpm_limit": { + "name": "rpm_limit", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "limit_5h_usd": { + "name": "limit_5h_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_5h_reset_mode": { + "name": "limit_5h_reset_mode", + "type": "daily_reset_mode", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'fixed'" + }, + "daily_limit_usd": { + "name": "daily_limit_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_weekly_usd": { + "name": "limit_weekly_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_monthly_usd": { + "name": "limit_monthly_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_total_usd": { + "name": "limit_total_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_5h_cost_reset_at": { + "name": "limit_5h_cost_reset_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "model_group_limits_uniq_idx": { + "name": "model_group_limits_uniq_idx", + "columns": [ + { + "expression": "subject_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "subject_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "model_group_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "model_group_limits_subject_idx": { + "name": "model_group_limits_subject_idx", + "columns": [ + { + "expression": "subject_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "subject_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "model_group_limits_group_idx": { + "name": "model_group_limits_group_idx", + "columns": [ + { + "expression": "model_group_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "model_group_limits_model_group_id_model_groups_id_fk": { + "name": "model_group_limits_model_group_id_model_groups_id_fk", + "tableFrom": "model_group_limits", + "tableTo": "model_groups", + "columnsFrom": [ + "model_group_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.model_group_members": { + "name": "model_group_members", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "model_group_id": { + "name": "model_group_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "varchar(128)", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "model_group_members_model_idx": { + "name": "model_group_members_model_idx", + "columns": [ + { + "expression": "model", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "model_group_members_group_idx": { + "name": "model_group_members_group_idx", + "columns": [ + { + "expression": "model_group_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "model_group_members_model_group_id_model_groups_id_fk": { + "name": "model_group_members_model_group_id_model_groups_id_fk", + "tableFrom": "model_group_members", + "tableTo": "model_groups", + "columnsFrom": [ + "model_group_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.model_groups": { + "name": "model_groups", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar(128)", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "is_singleton": { + "name": "is_singleton", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "model_groups_name_idx": { + "name": "model_groups_name_idx", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.model_prices": { + "name": "model_prices", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "model_name": { + "name": "model_name", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "price_data": { + "name": "price_data", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "source": { + "name": "source", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'litellm'" + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": { + "idx_model_prices_latest": { + "name": "idx_model_prices_latest", + "columns": [ + { + "expression": "model_name", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_model_prices_model_name": { + "name": "idx_model_prices_model_name", + "columns": [ + { + "expression": "model_name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_model_prices_created_at": { + "name": "idx_model_prices_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_model_prices_source": { + "name": "idx_model_prices_source", + "columns": [ + { + "expression": "source", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.notification_settings": { + "name": "notification_settings", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "enabled": { + "name": "enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "use_legacy_mode": { + "name": "use_legacy_mode", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "circuit_breaker_enabled": { + "name": "circuit_breaker_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "circuit_breaker_webhook": { + "name": "circuit_breaker_webhook", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "daily_leaderboard_enabled": { + "name": "daily_leaderboard_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "daily_leaderboard_webhook": { + "name": "daily_leaderboard_webhook", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "daily_leaderboard_time": { + "name": "daily_leaderboard_time", + "type": "varchar(10)", + "primaryKey": false, + "notNull": false, + "default": "'09:00'" + }, + "daily_leaderboard_top_n": { + "name": "daily_leaderboard_top_n", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 5 + }, + "cost_alert_enabled": { + "name": "cost_alert_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "cost_alert_webhook": { + "name": "cost_alert_webhook", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "cost_alert_threshold": { + "name": "cost_alert_threshold", + "type": "numeric(5, 2)", + "primaryKey": false, + "notNull": false, + "default": "'0.80'" + }, + "cost_alert_check_interval": { + "name": "cost_alert_check_interval", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 60 + }, + "cache_hit_rate_alert_enabled": { + "name": "cache_hit_rate_alert_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "cache_hit_rate_alert_webhook": { + "name": "cache_hit_rate_alert_webhook", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "cache_hit_rate_alert_window_mode": { + "name": "cache_hit_rate_alert_window_mode", + "type": "varchar(10)", + "primaryKey": false, + "notNull": false, + "default": "'auto'" + }, + "cache_hit_rate_alert_check_interval": { + "name": "cache_hit_rate_alert_check_interval", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 5 + }, + "cache_hit_rate_alert_historical_lookback_days": { + "name": "cache_hit_rate_alert_historical_lookback_days", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 7 + }, + "cache_hit_rate_alert_min_eligible_requests": { + "name": "cache_hit_rate_alert_min_eligible_requests", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 20 + }, + "cache_hit_rate_alert_min_eligible_tokens": { + "name": "cache_hit_rate_alert_min_eligible_tokens", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "cache_hit_rate_alert_abs_min": { + "name": "cache_hit_rate_alert_abs_min", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false, + "default": "'0.05'" + }, + "cache_hit_rate_alert_drop_rel": { + "name": "cache_hit_rate_alert_drop_rel", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false, + "default": "'0.3'" + }, + "cache_hit_rate_alert_drop_abs": { + "name": "cache_hit_rate_alert_drop_abs", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false, + "default": "'0.1'" + }, + "cache_hit_rate_alert_cooldown_minutes": { + "name": "cache_hit_rate_alert_cooldown_minutes", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 30 + }, + "cache_hit_rate_alert_top_n": { + "name": "cache_hit_rate_alert_top_n", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 10 + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.notification_target_bindings": { + "name": "notification_target_bindings", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "notification_type": { + "name": "notification_type", + "type": "notification_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "target_id": { + "name": "target_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "schedule_cron": { + "name": "schedule_cron", + "type": "varchar(100)", + "primaryKey": false, + "notNull": false + }, + "schedule_timezone": { + "name": "schedule_timezone", + "type": "varchar(50)", + "primaryKey": false, + "notNull": false + }, + "template_override": { + "name": "template_override", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": { + "unique_notification_target_binding": { + "name": "unique_notification_target_binding", + "columns": [ + { + "expression": "notification_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "target_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_notification_bindings_type": { + "name": "idx_notification_bindings_type", + "columns": [ + { + "expression": "notification_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_notification_bindings_target": { + "name": "idx_notification_bindings_target", + "columns": [ + { + "expression": "target_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "notification_target_bindings_target_id_webhook_targets_id_fk": { + "name": "notification_target_bindings_target_id_webhook_targets_id_fk", + "tableFrom": "notification_target_bindings", + "tableTo": "webhook_targets", + "columnsFrom": [ + "target_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.provider_endpoint_probe_logs": { + "name": "provider_endpoint_probe_logs", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "endpoint_id": { + "name": "endpoint_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "source": { + "name": "source", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'scheduled'" + }, + "ok": { + "name": "ok", + "type": "boolean", + "primaryKey": false, + "notNull": true + }, + "status_code": { + "name": "status_code", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "latency_ms": { + "name": "latency_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "error_type": { + "name": "error_type", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "error_message": { + "name": "error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": { + "idx_provider_endpoint_probe_logs_endpoint_created_at": { + "name": "idx_provider_endpoint_probe_logs_endpoint_created_at", + "columns": [ + { + "expression": "endpoint_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_provider_endpoint_probe_logs_created_at": { + "name": "idx_provider_endpoint_probe_logs_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "provider_endpoint_probe_logs_endpoint_id_provider_endpoints_id_fk": { + "name": "provider_endpoint_probe_logs_endpoint_id_provider_endpoints_id_fk", + "tableFrom": "provider_endpoint_probe_logs", + "tableTo": "provider_endpoints", + "columnsFrom": [ + "endpoint_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.provider_endpoints": { + "name": "provider_endpoints", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "vendor_id": { + "name": "vendor_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "provider_type": { + "name": "provider_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'claude'" + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "label": { + "name": "label", + "type": "varchar(200)", + "primaryKey": false, + "notNull": false + }, + "sort_order": { + "name": "sort_order", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "last_probed_at": { + "name": "last_probed_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "last_probe_ok": { + "name": "last_probe_ok", + "type": "boolean", + "primaryKey": false, + "notNull": false + }, + "last_probe_status_code": { + "name": "last_probe_status_code", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "last_probe_latency_ms": { + "name": "last_probe_latency_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "last_probe_error_type": { + "name": "last_probe_error_type", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "last_probe_error_message": { + "name": "last_probe_error_message", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "uniq_provider_endpoints_vendor_type_url": { + "name": "uniq_provider_endpoints_vendor_type_url", + "columns": [ + { + "expression": "vendor_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "provider_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "where": "\"provider_endpoints\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_provider_endpoints_vendor_type": { + "name": "idx_provider_endpoints_vendor_type", + "columns": [ + { + "expression": "vendor_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "provider_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"provider_endpoints\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_provider_endpoints_enabled": { + "name": "idx_provider_endpoints_enabled", + "columns": [ + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "vendor_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "provider_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"provider_endpoints\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_provider_endpoints_pick_enabled": { + "name": "idx_provider_endpoints_pick_enabled", + "columns": [ + { + "expression": "vendor_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "provider_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "sort_order", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"provider_endpoints\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_provider_endpoints_created_at": { + "name": "idx_provider_endpoints_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_provider_endpoints_deleted_at": { + "name": "idx_provider_endpoints_deleted_at", + "columns": [ + { + "expression": "deleted_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "provider_endpoints_vendor_id_provider_vendors_id_fk": { + "name": "provider_endpoints_vendor_id_provider_vendors_id_fk", + "tableFrom": "provider_endpoints", + "tableTo": "provider_vendors", + "columnsFrom": [ + "vendor_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.provider_groups": { + "name": "provider_groups", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar(200)", + "primaryKey": false, + "notNull": true + }, + "cost_multiplier": { + "name": "cost_multiplier", + "type": "numeric(10, 4)", + "primaryKey": false, + "notNull": true, + "default": "'1.0'" + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "provider_groups_name_unique": { + "name": "provider_groups_name_unique", + "nullsNotDistinct": false, + "columns": [ + "name" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.provider_vendors": { + "name": "provider_vendors", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "website_domain": { + "name": "website_domain", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "display_name": { + "name": "display_name", + "type": "varchar(200)", + "primaryKey": false, + "notNull": false + }, + "website_url": { + "name": "website_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "favicon_url": { + "name": "favicon_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": { + "uniq_provider_vendors_website_domain": { + "name": "uniq_provider_vendors_website_domain", + "columns": [ + { + "expression": "website_domain", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_provider_vendors_created_at": { + "name": "idx_provider_vendors_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.providers": { + "name": "providers", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "url": { + "name": "url", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "key": { + "name": "key", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "provider_vendor_id": { + "name": "provider_vendor_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "weight": { + "name": "weight", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 1 + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "group_priorities": { + "name": "group_priorities", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "default": "'null'::jsonb" + }, + "cost_multiplier": { + "name": "cost_multiplier", + "type": "numeric(10, 4)", + "primaryKey": false, + "notNull": false, + "default": "'1.0'" + }, + "group_tag": { + "name": "group_tag", + "type": "varchar(255)", + "primaryKey": false, + "notNull": false + }, + "provider_type": { + "name": "provider_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'claude'" + }, + "preserve_client_ip": { + "name": "preserve_client_ip", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "disable_session_reuse": { + "name": "disable_session_reuse", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "model_redirects": { + "name": "model_redirects", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "allowed_models": { + "name": "allowed_models", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "default": "'null'::jsonb" + }, + "allowed_clients": { + "name": "allowed_clients", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "blocked_clients": { + "name": "blocked_clients", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "active_time_start": { + "name": "active_time_start", + "type": "varchar(5)", + "primaryKey": false, + "notNull": false + }, + "active_time_end": { + "name": "active_time_end", + "type": "varchar(5)", + "primaryKey": false, + "notNull": false + }, + "codex_instructions_strategy": { + "name": "codex_instructions_strategy", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false, + "default": "'auto'" + }, + "mcp_passthrough_type": { + "name": "mcp_passthrough_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'none'" + }, + "mcp_passthrough_url": { + "name": "mcp_passthrough_url", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "limit_5h_usd": { + "name": "limit_5h_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_5h_reset_mode": { + "name": "limit_5h_reset_mode", + "type": "daily_reset_mode", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'rolling'" + }, + "limit_daily_usd": { + "name": "limit_daily_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "daily_reset_mode": { + "name": "daily_reset_mode", + "type": "daily_reset_mode", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'fixed'" + }, + "daily_reset_time": { + "name": "daily_reset_time", + "type": "varchar(5)", + "primaryKey": false, + "notNull": true, + "default": "'00:00'" + }, + "limit_weekly_usd": { + "name": "limit_weekly_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_monthly_usd": { + "name": "limit_monthly_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_total_usd": { + "name": "limit_total_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "total_cost_reset_at": { + "name": "total_cost_reset_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "limit_concurrent_sessions": { + "name": "limit_concurrent_sessions", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "max_retry_attempts": { + "name": "max_retry_attempts", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "circuit_breaker_failure_threshold": { + "name": "circuit_breaker_failure_threshold", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 5 + }, + "circuit_breaker_open_duration": { + "name": "circuit_breaker_open_duration", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 1800000 + }, + "circuit_breaker_half_open_success_threshold": { + "name": "circuit_breaker_half_open_success_threshold", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 2 + }, + "proxy_url": { + "name": "proxy_url", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "proxy_fallback_to_direct": { + "name": "proxy_fallback_to_direct", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "custom_headers": { + "name": "custom_headers", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "first_byte_timeout_streaming_ms": { + "name": "first_byte_timeout_streaming_ms", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "streaming_idle_timeout_ms": { + "name": "streaming_idle_timeout_ms", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "request_timeout_non_streaming_ms": { + "name": "request_timeout_non_streaming_ms", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "website_url": { + "name": "website_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "favicon_url": { + "name": "favicon_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "cache_ttl_preference": { + "name": "cache_ttl_preference", + "type": "varchar(10)", + "primaryKey": false, + "notNull": false + }, + "swap_cache_ttl_billing": { + "name": "swap_cache_ttl_billing", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "context_1m_preference": { + "name": "context_1m_preference", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "codex_reasoning_effort_preference": { + "name": "codex_reasoning_effort_preference", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "codex_reasoning_summary_preference": { + "name": "codex_reasoning_summary_preference", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "codex_text_verbosity_preference": { + "name": "codex_text_verbosity_preference", + "type": "varchar(10)", + "primaryKey": false, + "notNull": false + }, + "codex_parallel_tool_calls_preference": { + "name": "codex_parallel_tool_calls_preference", + "type": "varchar(10)", + "primaryKey": false, + "notNull": false + }, + "codex_service_tier_preference": { + "name": "codex_service_tier_preference", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "anthropic_max_tokens_preference": { + "name": "anthropic_max_tokens_preference", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "anthropic_thinking_budget_preference": { + "name": "anthropic_thinking_budget_preference", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "anthropic_adaptive_thinking": { + "name": "anthropic_adaptive_thinking", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "default": "'null'::jsonb" + }, + "gemini_google_search_preference": { + "name": "gemini_google_search_preference", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "tpm": { + "name": "tpm", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "rpm": { + "name": "rpm", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "rpd": { + "name": "rpd", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "cc": { + "name": "cc", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 0 + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_providers_enabled_priority": { + "name": "idx_providers_enabled_priority", + "columns": [ + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "priority", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "weight", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"providers\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_providers_group": { + "name": "idx_providers_group", + "columns": [ + { + "expression": "group_tag", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"providers\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_providers_vendor_type_url_active": { + "name": "idx_providers_vendor_type_url_active", + "columns": [ + { + "expression": "provider_vendor_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "provider_type", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "url", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"providers\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_providers_created_at": { + "name": "idx_providers_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_providers_deleted_at": { + "name": "idx_providers_deleted_at", + "columns": [ + { + "expression": "deleted_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_providers_vendor_type": { + "name": "idx_providers_vendor_type", + "columns": [ + { + "expression": "provider_vendor_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "provider_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"providers\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_providers_enabled_vendor_type": { + "name": "idx_providers_enabled_vendor_type", + "columns": [ + { + "expression": "provider_vendor_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "provider_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"providers\".\"deleted_at\" IS NULL AND \"providers\".\"is_enabled\" = true AND \"providers\".\"provider_vendor_id\" IS NOT NULL AND \"providers\".\"provider_vendor_id\" > 0", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "providers_provider_vendor_id_provider_vendors_id_fk": { + "name": "providers_provider_vendor_id_provider_vendors_id_fk", + "tableFrom": "providers", + "tableTo": "provider_vendors", + "columnsFrom": [ + "provider_vendor_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "restrict", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.quota_boost_grants": { + "name": "quota_boost_grants", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "model_group_id": { + "name": "model_group_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "window": { + "name": "window", + "type": "boost_window", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "amount_usd": { + "name": "amount_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": true + }, + "valid_from": { + "name": "valid_from", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "valid_to": { + "name": "valid_to", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + }, + "note": { + "name": "note", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_by": { + "name": "created_by", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "quota_boost_grants_target_idx": { + "name": "quota_boost_grants_target_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "model_group_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "window", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "quota_boost_grants_valid_to_idx": { + "name": "quota_boost_grants_valid_to_idx", + "columns": [ + { + "expression": "valid_to", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "quota_boost_grants_user_id_users_id_fk": { + "name": "quota_boost_grants_user_id_users_id_fk", + "tableFrom": "quota_boost_grants", + "tableTo": "users", + "columnsFrom": [ + "user_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + }, + "quota_boost_grants_model_group_id_model_groups_id_fk": { + "name": "quota_boost_grants_model_group_id_model_groups_id_fk", + "tableFrom": "quota_boost_grants", + "tableTo": "model_groups", + "columnsFrom": [ + "model_group_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.request_filters": { + "name": "request_filters", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "scope": { + "name": "scope", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true + }, + "action": { + "name": "action", + "type": "varchar(30)", + "primaryKey": false, + "notNull": true + }, + "match_type": { + "name": "match_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "target": { + "name": "target", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "replacement": { + "name": "replacement", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "priority": { + "name": "priority", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 0 + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "binding_type": { + "name": "binding_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'global'" + }, + "provider_ids": { + "name": "provider_ids", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "group_tags": { + "name": "group_tags", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "rule_mode": { + "name": "rule_mode", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'simple'" + }, + "execution_phase": { + "name": "execution_phase", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'guard'" + }, + "operations": { + "name": "operations", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": { + "idx_request_filters_enabled": { + "name": "idx_request_filters_enabled", + "columns": [ + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "priority", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_request_filters_scope": { + "name": "idx_request_filters_scope", + "columns": [ + { + "expression": "scope", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_request_filters_action": { + "name": "idx_request_filters_action", + "columns": [ + { + "expression": "action", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_request_filters_binding": { + "name": "idx_request_filters_binding", + "columns": [ + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "binding_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_request_filters_phase": { + "name": "idx_request_filters_phase", + "columns": [ + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "execution_phase", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sensitive_words": { + "name": "sensitive_words", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "word": { + "name": "word", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "match_type": { + "name": "match_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'contains'" + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": { + "idx_sensitive_words_enabled": { + "name": "idx_sensitive_words_enabled", + "columns": [ + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "match_type", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_sensitive_words_created_at": { + "name": "idx_sensitive_words_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.system_settings": { + "name": "system_settings", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "site_title": { + "name": "site_title", + "type": "varchar(128)", + "primaryKey": false, + "notNull": true, + "default": "'Claude Code Hub'" + }, + "allow_global_usage_view": { + "name": "allow_global_usage_view", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "currency_display": { + "name": "currency_display", + "type": "varchar(10)", + "primaryKey": false, + "notNull": true, + "default": "'USD'" + }, + "billing_model_source": { + "name": "billing_model_source", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'original'" + }, + "codex_priority_billing_source": { + "name": "codex_priority_billing_source", + "type": "varchar(20)", + "primaryKey": false, + "notNull": true, + "default": "'requested'" + }, + "bill_non_successful_requests": { + "name": "bill_non_successful_requests", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "bill_hedge_losers": { + "name": "bill_hedge_losers", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "timezone": { + "name": "timezone", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "enable_auto_cleanup": { + "name": "enable_auto_cleanup", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "cleanup_retention_days": { + "name": "cleanup_retention_days", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 30 + }, + "cleanup_schedule": { + "name": "cleanup_schedule", + "type": "varchar(50)", + "primaryKey": false, + "notNull": false, + "default": "'0 2 * * *'" + }, + "cleanup_batch_size": { + "name": "cleanup_batch_size", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 10000 + }, + "enable_client_version_check": { + "name": "enable_client_version_check", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "verbose_provider_error": { + "name": "verbose_provider_error", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "pass_through_upstream_error_message": { + "name": "pass_through_upstream_error_message", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "enable_http2": { + "name": "enable_http2", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "enable_openai_responses_websocket": { + "name": "enable_openai_responses_websocket", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "enable_high_concurrency_mode": { + "name": "enable_high_concurrency_mode", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "intercept_anthropic_warmup_requests": { + "name": "intercept_anthropic_warmup_requests", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "enable_thinking_signature_rectifier": { + "name": "enable_thinking_signature_rectifier", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "enable_thinking_budget_rectifier": { + "name": "enable_thinking_budget_rectifier", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "enable_billing_header_rectifier": { + "name": "enable_billing_header_rectifier", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "enable_response_input_rectifier": { + "name": "enable_response_input_rectifier", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "allow_non_conversation_endpoint_provider_fallback": { + "name": "allow_non_conversation_endpoint_provider_fallback", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "fake_streaming_whitelist": { + "name": "fake_streaming_whitelist", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "enable_codex_session_id_completion": { + "name": "enable_codex_session_id_completion", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "enable_claude_metadata_user_id_injection": { + "name": "enable_claude_metadata_user_id_injection", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "enable_response_fixer": { + "name": "enable_response_fixer", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "response_fixer_config": { + "name": "response_fixer_config", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "default": "'{\"fixTruncatedJson\":true,\"fixSseFormat\":true,\"fixEncoding\":true,\"maxJsonDepth\":200,\"maxFixSize\":1048576}'::jsonb" + }, + "quota_db_refresh_interval_seconds": { + "name": "quota_db_refresh_interval_seconds", + "type": "integer", + "primaryKey": false, + "notNull": false, + "default": 10 + }, + "quota_lease_percent_5h": { + "name": "quota_lease_percent_5h", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false, + "default": "'0.05'" + }, + "quota_lease_percent_daily": { + "name": "quota_lease_percent_daily", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false, + "default": "'0.05'" + }, + "quota_lease_percent_weekly": { + "name": "quota_lease_percent_weekly", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false, + "default": "'0.05'" + }, + "quota_lease_percent_monthly": { + "name": "quota_lease_percent_monthly", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false, + "default": "'0.05'" + }, + "quota_lease_cap_usd": { + "name": "quota_lease_cap_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "quota_model_lease_percent_5h": { + "name": "quota_model_lease_percent_5h", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false + }, + "quota_model_lease_percent_daily": { + "name": "quota_model_lease_percent_daily", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false + }, + "quota_model_lease_percent_weekly": { + "name": "quota_model_lease_percent_weekly", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false + }, + "quota_model_lease_percent_monthly": { + "name": "quota_model_lease_percent_monthly", + "type": "numeric(5, 4)", + "primaryKey": false, + "notNull": false + }, + "quota_model_lease_min_slice_usd": { + "name": "quota_model_lease_min_slice_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "ip_extraction_config": { + "name": "ip_extraction_config", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "ip_geo_lookup_enabled": { + "name": "ip_geo_lookup_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "public_status_window_hours": { + "name": "public_status_window_hours", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 24 + }, + "public_status_aggregation_interval_minutes": { + "name": "public_status_aggregation_interval_minutes", + "type": "integer", + "primaryKey": false, + "notNull": true, + "default": 5 + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.usage_ledger": { + "name": "usage_ledger", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "request_id": { + "name": "request_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "user_id": { + "name": "user_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "key": { + "name": "key", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "provider_id": { + "name": "provider_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "final_provider_id": { + "name": "final_provider_id", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "model": { + "name": "model", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "original_model": { + "name": "original_model", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "actual_response_model": { + "name": "actual_response_model", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "endpoint": { + "name": "endpoint", + "type": "varchar(256)", + "primaryKey": false, + "notNull": false + }, + "api_type": { + "name": "api_type", + "type": "varchar(20)", + "primaryKey": false, + "notNull": false + }, + "session_id": { + "name": "session_id", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "status_code": { + "name": "status_code", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "is_success": { + "name": "is_success", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": false + }, + "success_rate_outcome": { + "name": "success_rate_outcome", + "type": "varchar(16)", + "primaryKey": false, + "notNull": false + }, + "blocked_by": { + "name": "blocked_by", + "type": "varchar(50)", + "primaryKey": false, + "notNull": false + }, + "cost_usd": { + "name": "cost_usd", + "type": "numeric(21, 15)", + "primaryKey": false, + "notNull": false, + "default": "'0'" + }, + "cost_multiplier": { + "name": "cost_multiplier", + "type": "numeric(10, 4)", + "primaryKey": false, + "notNull": false + }, + "group_cost_multiplier": { + "name": "group_cost_multiplier", + "type": "numeric(10, 4)", + "primaryKey": false, + "notNull": false + }, + "input_tokens": { + "name": "input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "output_tokens": { + "name": "output_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_creation_input_tokens": { + "name": "cache_creation_input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_read_input_tokens": { + "name": "cache_read_input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_creation_5m_input_tokens": { + "name": "cache_creation_5m_input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_creation_1h_input_tokens": { + "name": "cache_creation_1h_input_tokens", + "type": "bigint", + "primaryKey": false, + "notNull": false + }, + "cache_ttl_applied": { + "name": "cache_ttl_applied", + "type": "varchar(10)", + "primaryKey": false, + "notNull": false + }, + "context_1m_applied": { + "name": "context_1m_applied", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "swap_cache_ttl_applied": { + "name": "swap_cache_ttl_applied", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "duration_ms": { + "name": "duration_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "ttfb_ms": { + "name": "ttfb_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "client_ip": { + "name": "client_ip", + "type": "varchar(45)", + "primaryKey": false, + "notNull": false + }, + "counted_in_user_global": { + "name": "counted_in_user_global", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "counted_in_key_global": { + "name": "counted_in_key_global", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "idx_usage_ledger_request_id": { + "name": "idx_usage_ledger_request_id", + "columns": [ + { + "expression": "request_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_user_created_at": { + "name": "idx_usage_ledger_user_created_at", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"blocked_by\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_key_created_at": { + "name": "idx_usage_ledger_key_created_at", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"blocked_by\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_provider_created_at": { + "name": "idx_usage_ledger_provider_created_at", + "columns": [ + { + "expression": "final_provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"blocked_by\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_created_at_minute": { + "name": "idx_usage_ledger_created_at_minute", + "columns": [ + { + "expression": "date_trunc('minute', \"created_at\" AT TIME ZONE 'UTC')", + "asc": true, + "isExpression": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_created_at_desc_id": { + "name": "idx_usage_ledger_created_at_desc_id", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + }, + { + "expression": "id", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_session_id": { + "name": "idx_usage_ledger_session_id", + "columns": [ + { + "expression": "session_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"session_id\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_model": { + "name": "idx_usage_ledger_model", + "columns": [ + { + "expression": "model", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"model\" IS NOT NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_key_cost": { + "name": "idx_usage_ledger_key_cost", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "cost_usd", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "endpoint", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"blocked_by\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_user_cost_cover": { + "name": "idx_usage_ledger_user_cost_cover", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "cost_usd", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "endpoint", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"blocked_by\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_provider_cost_cover": { + "name": "idx_usage_ledger_provider_cost_cover", + "columns": [ + { + "expression": "final_provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "cost_usd", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "endpoint", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"blocked_by\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_usage_ledger_key_created_at_desc_cover": { + "name": "idx_usage_ledger_key_created_at_desc_cover", + "columns": [ + { + "expression": "key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "\"created_at\" DESC NULLS LAST", + "asc": true, + "isExpression": true, + "nulls": "last" + }, + { + "expression": "final_provider_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"usage_ledger\".\"blocked_by\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.user_groups": { + "name": "user_groups", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "tag": { + "name": "tag", + "type": "varchar(255)", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar(128)", + "primaryKey": false, + "notNull": false + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "user_groups_tag_idx": { + "name": "user_groups_tag_idx", + "columns": [ + { + "expression": "tag", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.users": { + "name": "users", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "description": { + "name": "description", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "role": { + "name": "role", + "type": "varchar", + "primaryKey": false, + "notNull": false, + "default": "'user'" + }, + "rpm_limit": { + "name": "rpm_limit", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "daily_limit_usd": { + "name": "daily_limit_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "provider_group": { + "name": "provider_group", + "type": "varchar(200)", + "primaryKey": false, + "notNull": false, + "default": "'default'" + }, + "tags": { + "name": "tags", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "default": "'[]'::jsonb" + }, + "limit_5h_usd": { + "name": "limit_5h_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_5h_reset_mode": { + "name": "limit_5h_reset_mode", + "type": "daily_reset_mode", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'rolling'" + }, + "limit_weekly_usd": { + "name": "limit_weekly_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_monthly_usd": { + "name": "limit_monthly_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "limit_total_usd": { + "name": "limit_total_usd", + "type": "numeric(10, 2)", + "primaryKey": false, + "notNull": false + }, + "cost_reset_at": { + "name": "cost_reset_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "limit_5h_cost_reset_at": { + "name": "limit_5h_cost_reset_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "limit_concurrent_sessions": { + "name": "limit_concurrent_sessions", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "daily_reset_mode": { + "name": "daily_reset_mode", + "type": "daily_reset_mode", + "typeSchema": "public", + "primaryKey": false, + "notNull": true, + "default": "'fixed'" + }, + "daily_reset_time": { + "name": "daily_reset_time", + "type": "varchar(5)", + "primaryKey": false, + "notNull": true, + "default": "'00:00'" + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "allowed_clients": { + "name": "allowed_clients", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "default": "'[]'::jsonb" + }, + "allowed_models": { + "name": "allowed_models", + "type": "jsonb", + "primaryKey": false, + "notNull": false, + "default": "'[]'::jsonb" + }, + "blocked_clients": { + "name": "blocked_clients", + "type": "jsonb", + "primaryKey": false, + "notNull": true, + "default": "'[]'::jsonb" + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "idx_users_active_role_sort": { + "name": "idx_users_active_role_sort", + "columns": [ + { + "expression": "deleted_at", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "role", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"users\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_users_enabled_expires_at": { + "name": "idx_users_enabled_expires_at", + "columns": [ + { + "expression": "is_enabled", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"users\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_users_tags_gin": { + "name": "idx_users_tags_gin", + "columns": [ + { + "expression": "tags", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "where": "\"users\".\"deleted_at\" IS NULL", + "concurrently": false, + "method": "gin", + "with": {} + }, + "idx_users_created_at": { + "name": "idx_users_created_at", + "columns": [ + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "idx_users_deleted_at": { + "name": "idx_users_deleted_at", + "columns": [ + { + "expression": "deleted_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.webhook_targets": { + "name": "webhook_targets", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "serial", + "primaryKey": true, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar(100)", + "primaryKey": false, + "notNull": true + }, + "provider_type": { + "name": "provider_type", + "type": "webhook_provider_type", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "webhook_url": { + "name": "webhook_url", + "type": "varchar(1024)", + "primaryKey": false, + "notNull": false + }, + "telegram_bot_token": { + "name": "telegram_bot_token", + "type": "varchar(256)", + "primaryKey": false, + "notNull": false + }, + "telegram_chat_id": { + "name": "telegram_chat_id", + "type": "varchar(64)", + "primaryKey": false, + "notNull": false + }, + "dingtalk_secret": { + "name": "dingtalk_secret", + "type": "varchar(256)", + "primaryKey": false, + "notNull": false + }, + "custom_template": { + "name": "custom_template", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "custom_headers": { + "name": "custom_headers", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "proxy_url": { + "name": "proxy_url", + "type": "varchar(512)", + "primaryKey": false, + "notNull": false + }, + "proxy_fallback_to_direct": { + "name": "proxy_fallback_to_direct", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": false + }, + "is_enabled": { + "name": "is_enabled", + "type": "boolean", + "primaryKey": false, + "notNull": true, + "default": true + }, + "last_test_at": { + "name": "last_test_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "last_test_result": { + "name": "last_test_result", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false, + "default": "now()" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.boost_window": { + "name": "boost_window", + "schema": "public", + "values": [ + "5h", + "daily", + "weekly", + "monthly", + "total" + ] + }, + "public.daily_reset_mode": { + "name": "daily_reset_mode", + "schema": "public", + "values": [ + "fixed", + "rolling" + ] + }, + "public.limit_subject": { + "name": "limit_subject", + "schema": "public", + "values": [ + "user", + "key", + "user_group" + ] + }, + "public.notification_type": { + "name": "notification_type", + "schema": "public", + "values": [ + "circuit_breaker", + "daily_leaderboard", + "cost_alert", + "cache_hit_rate_alert" + ] + }, + "public.webhook_provider_type": { + "name": "webhook_provider_type", + "schema": "public", + "values": [ + "wechat", + "feishu", + "dingtalk", + "telegram", + "custom" + ] + } + }, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 7d08f0bbd..40aad8095 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -736,6 +736,13 @@ "when": 1780575810214, "tag": "0104_watery_thunderbird", "breakpoints": true + }, + { + "idx": 105, + "version": "7", + "when": 1780982269105, + "tag": "0105_broad_crystal", + "breakpoints": true } ] } \ No newline at end of file diff --git a/messages/en/errors.json b/messages/en/errors.json index 0e9d3a4ac..579a97f5c 100644 --- a/messages/en/errors.json +++ b/messages/en/errors.json @@ -70,6 +70,12 @@ "RATE_LIMIT_CONCURRENT_SESSIONS_EXCEEDED": "Concurrent sessions limit exceeded: {current} sessions (limit: {limit}). Please wait for active sessions to complete", "RATE_LIMIT_DAILY_QUOTA_EXCEEDED": "Daily quota exceeded: ${current} USD (limit: ${limit} USD). Resets at {resetTime}", "RATE_LIMIT_DAILY_ROLLING_EXCEEDED": "24-hour rolling window cost limit exceeded: ${current} USD (limit: ${limit} USD). Usage gradually expires over the past 24 hours", + "MODEL_RATE_LIMIT_5H_EXCEEDED": "Model {model} 5-hour cost limit exceeded: ${current} USD (limit: ${limit} USD)", + "MODEL_RATE_LIMIT_DAILY_QUOTA_EXCEEDED": "Model {model} daily quota exceeded: ${current} USD (limit: ${limit} USD)", + "MODEL_RATE_LIMIT_WEEKLY_EXCEEDED": "Model {model} weekly cost limit exceeded: ${current} USD (limit: ${limit} USD)", + "MODEL_RATE_LIMIT_MONTHLY_EXCEEDED": "Model {model} monthly cost limit exceeded: ${current} USD (limit: ${limit} USD)", + "MODEL_RATE_LIMIT_TOTAL_EXCEEDED": "Model {model} total spending limit exceeded: ${current} / ${limit} USD", + "MODEL_RATE_LIMIT_UNAVAILABLE": "Model {model} rate-limit verification is temporarily unavailable; request rejected (fail-closed)", "USER_NOT_FOUND": "User not found", "USER_CANNOT_MODIFY_SENSITIVE_FIELDS": "Regular users cannot modify quota limits and provider groups", diff --git a/messages/en/myUsage.json b/messages/en/myUsage.json index 851da6c1d..335fed098 100644 --- a/messages/en/myUsage.json +++ b/messages/en/myUsage.json @@ -27,7 +27,10 @@ "keyLevel": "Key", "userLevel": "User", "unlimited": "Unlimited", - "empty": "No quota data" + "empty": "No quota data", + "countedInGlobalLabel": "Global", + "modelGroupOnlyLabel": "Model group", + "splitNote": "Cost split: this amount hits a model-group limit and is counted separately from the global quota." }, "logs": { "title": "Usage Logs", @@ -125,6 +128,11 @@ "monthly": "Monthly", "total": "Total" }, + "modelGroupQuota": { + "sectionTitle": "Model Group Quotas", + "modelsLabel": "Models", + "note": "These model groups are limited separately; matching usage is metered against the model-group budget and is not counted toward the global quota above." + }, "logsCollapsible": { "title": "Usage Logs", "lastStatus": "Last: {code} ({time})", diff --git a/messages/en/quota.json b/messages/en/quota.json index 124ab271c..bb0f503ac 100644 --- a/messages/en/quota.json +++ b/messages/en/quota.json @@ -57,8 +57,86 @@ "description": "View and manage quota usage across all levels", "tabs": { "users": "User Quotas", + "modelLimits": "Per-Model Limits", + "modelGroups": "Model Groups", + "userGroups": "User Groups", "keys": "Key Quotas", "providers": "Provider Quotas" + }, + "subTabs": { + "config": "Limit Config", + "modelGroups": "Model Groups", + "userGroups": "User Groups" + } + }, + "modelLimits": { + "title": "Per-Model-Group Limits", + "description": "Set cost caps per subject (user, user group) for a specific model group. Hitting a model-group limit fully replaces the global per-user cap for that request.", + "disabledNotice": "Per-model limits are not active yet. Set ENABLE_MODEL_RATE_LIMIT=true (requires ENABLE_RATE_LIMIT=true) to enable them. You can configure them ahead of time.", + "loadError": "Failed to load model-group limits", + "deleteSuccess": "Limit deleted", + "deleteError": "Failed to delete limit", + "noData": "No limits configured", + "allSubjects": "All subjects", + "allModelGroups": "All model groups", + "searchSubject": "Search...", + "searchModelGroup": "Search model group...", + "noResults": "No results found", + "addLimit": "Add limit", + "searchLimits": "Search subject or model group", + "expandMembers": "Expand / collapse members", + "subjectType": "Subject type", + "subjectTypeUser": "User", + "subjectTypeUserGroup": "User group", + "subjectTypeKey": "Key", + "modelGroup": "Model group", + "semanticsNote": "When a request hits a model-group limit, the global per-user or per-key quota is bypassed entirely for that model group (complete cost partition). RPM and concurrent-session limits continue to apply. User-group limits act as per-member ceilings. When multiple sources define a limit for the same window, the most permissive (highest) value is used.", + "resetMode": { + "fixed": "Fixed window", + "rolling": "Rolling window" + }, + "table": { + "subjectType": "Subject type", + "subject": "Subject", + "modelGroup": "Model group", + "fiveHour": "5h", + "daily": "Daily", + "weekly": "Weekly", + "monthly": "Monthly", + "total": "Total", + "resetMode": "Reset mode", + "actions": "Actions" + }, + "deleteConfirm": { + "title": "Delete limit", + "description": "Delete the limit for model group \"{group}\"? This cannot be undone.", + "cancel": "Cancel", + "confirm": "Delete" + }, + "dialog": { + "addTitle": "Add model-group limit", + "editTitle": "Edit model-group limit", + "description": "Caps for model group \"{group}\". Leave a field empty to keep that window unlimited.", + "fiveHour": "5-hour limit", + "daily": "Daily limit", + "weekly": "Weekly limit", + "monthly": "Monthly limit", + "total": "Total limit", + "unlimited": "Unlimited", + "resetMode": "5-hour reset mode", + "resetModeFixed": "Fixed window", + "resetModeRolling": "Rolling window", + "save": "Save", + "saveSuccess": "Limit saved", + "saveError": "Failed to save limit", + "selectDescription": "Set cost caps for the selected subject and model group. Leave a field empty to keep that window unlimited.", + "subjectRequired": "Please select a subject", + "modelGroupRequired": "Please select a model group" + }, + "boosts": { + "panelTitle": "Quota Boosts", + "dialogDescription": "Grant temporary quota boosts for user \"{user}\" on model group \"{group}\".", + "configuredCount": "{count} quota boost(s) configured" } }, "users": { @@ -412,5 +490,147 @@ "saveFailed": "Update failed", "invalidNumber": "Please enter a valid number", "negativeNotAllowed": "Cannot be negative" - } + }, + "modelGroups": { + "title": "Model Group Management", + "description": "Create model groups for fine-grained rate limiting. Each model can belong to at most one group (global exclusivity).", + "semanticsNote": "When a request matches a model group limit, the global per-user or per-key quota does not apply to that request (full partition). RPM and concurrent session limits always remain active.", + "totalCount": "{count} groups", + "addGroup": "New Group", + "addSingleton": "Single Model Group", + "singleton": "Singleton", + "group": "Group", + "noData": "No model groups. Click \"New Group\" to create one.", + "noMembers": "No members", + "nameRequired": "Group name is required.", + "modelRequired": "Model name is required.", + "duplicateName": "A group with that name already exists.", + "memberConflict": "Model \"{model}\" already belongs to group \"{groupName}\" (id={groupId}).", + "loadError": "Failed to load model groups.", + "createSuccess": "Model group created.", + "createError": "Failed to create model group.", + "updateSuccess": "Model group updated.", + "updateError": "Failed to update model group.", + "deleteSuccess": "Model group \"{name}\" deleted.", + "deleteError": "Failed to delete model group.", + "memberUpdatePartialError": "Some member changes failed: {errors}", + "deleteConfirm": { + "title": "Delete model group?", + "description": "This will permanently delete group \"{name}\" and remove all its model mappings. This action cannot be undone.", + "cancel": "Cancel", + "confirm": "Delete" + }, + "table": { + "name": "Group Name", + "description": "Description", + "members": "Models", + "type": "Type", + "actions": "Actions" + }, + "dialog": { + "addTitle": "Create Model Group", + "addDescription": "Create a group to collect multiple models under one rate-limit policy.", + "addSingletonTitle": "Create Single-Model Group", + "addSingletonDescription": "Quickly wrap one model in a singleton group for per-model rate limiting.", + "manageTitle": "Edit Group: {name}", + "manageDescription": "Rename the group or adjust its member models. Members must be globally unique across all groups.", + "name": "Group Name", + "nameOptional": "optional", + "namePlaceholder": "e.g. gpt-4-family", + "namePlaceholderSingleton": "Defaults to model name", + "description": "Description", + "descriptionPlaceholder": "Optional description", + "model": "Model", + "modelPlaceholder": "Select or type model name", + "searchModel": "Search models...", + "noModels": "No models found", + "members": "Members", + "addMember": "Add", + "selectedCount": "{count} selected", + "memberConflictNote": "If a model already belongs to another group, adding it here will fail (global exclusivity).", + "cancel": "Cancel", + "save": "Save" + } + }, + "userGroups": { + "title": "User Group Management", + "description": "Register tags as user groups. Membership is derived from users whose tags contain the group tag.", + "loading": "Loading...", + "totalCount": "{count} groups registered", + "createGroup": "Create Group", + "noGroups": "No user groups registered yet", + "members": "members", + "tagRequired": "Please select a tag", + "duplicateTag": "This tag is already registered as a group", + "form": { + "tag": "Tag", + "tagPlaceholder": "Select a tag", + "noAvailableTags": "All tags are already registered", + "name": "Display Name", + "namePlaceholder": "Optional display name", + "description": "Description", + "descriptionPlaceholder": "Optional description", + "cancel": "Cancel", + "create": "Create", + "save": "Save", + "saving": "Saving..." + }, + "createDialog": { + "title": "Create User Group", + "description": "Select an existing user tag to register as a user group." + }, + "editDialog": { + "title": "Edit User Group", + "description": "Editing group: {tag}" + }, + "deleteDialog": { + "title": "Delete User Group", + "description": "Are you sure you want to delete the group \"{tag}\"? This will not modify any user tags.", + "confirm": "Delete" + } + }, + "quotaBoosts": { + "errors": { + "list_failed": "Failed to retrieve quota boost grants.", + "create_failed": "Failed to create quota boost grant.", + "delete_failed": "Failed to revoke quota boost grant.", + "invalid_validity_range": "The expiry time must be after the start time.", + "action_failed": "Quota boost operation failed." + }, + "note": "Boosts grant temporary extra capacity on top of the base limit for this user and model group. They activate exactly at validFrom and stack additively. Changes propagate within the cache TTL.", + "window": "Window", + "window_5h": "5-hour", + "window_daily": "Daily", + "window_weekly": "Weekly", + "window_monthly": "Monthly", + "window_total": "Total", + "amount": "Boost amount", + "validFrom": "Valid from", + "validTo": "Valid until", + "note_label": "Note", + "notePlaceholder": "Optional admin note", + "addBoost": "Add boost", + "noBoosts": "No boosts configured for this user and model group", + "createSuccess": "Quota boost added", + "deleteSuccess": "Quota boost revoked", + "invalidAmount": "Please enter a valid positive amount", + "validityRequired": "Please fill in both valid-from and valid-until", + "revokeConfirm": { + "title": "Revoke quota boost", + "description": "This will permanently revoke the quota boost. The effect propagates within the cache TTL.", + "cancel": "Cancel", + "confirm": "Revoke" + }, + "table": { + "window": "Window", + "amount": "Amount", + "validFrom": "Valid from", + "validTo": "Valid until", + "note": "Note", + "actions": "Actions" + } + }, + "countedInGlobalLabel": "Global", + "modelGroupOnlyLabel": "Model group", + "splitNote": "Cost split: this amount hits a model-group limit and is counted separately from the global quota." } diff --git a/messages/en/settings/providers/strings.json b/messages/en/settings/providers/strings.json index 288e734b5..4d57bf664 100644 --- a/messages/en/settings/providers/strings.json +++ b/messages/en/settings/providers/strings.json @@ -48,6 +48,16 @@ "viewModeList": "List", "viewModeVendor": "Vendor", "viewModeGroups": "Groups", + "viewModeModels": "Models", + "modelView": { + "search": "Search models...", + "modelCount": "{count} models", + "providerCount": "{count} providers", + "unrestricted": "Unrestricted", + "unrestrictedDesc": "These providers have no explicit model restrictions and will accept any model.", + "noModels": "No providers with specific model configurations found.", + "noResults": "No matching models found." + }, "endpoints": "Endpoints", "manualProbe": "Probe", "addEndpoint": "Add Endpoint", diff --git a/messages/ja/errors.json b/messages/ja/errors.json index b951cd5bb..80a974326 100644 --- a/messages/ja/errors.json +++ b/messages/ja/errors.json @@ -64,6 +64,12 @@ "RATE_LIMIT_WEEKLY_EXCEEDED": "週次コスト制限を超過しました:${current} USD(制限:${limit} USD)。{resetTime} にリセットされます", "RATE_LIMIT_MONTHLY_EXCEEDED": "月次コスト制限を超過しました:${current} USD(制限:${limit} USD)。{resetTime} にリセットされます", "RATE_LIMIT_TOTAL_EXCEEDED": "総支出制限を超過しました:${current} / ${limit} USD", + "MODEL_RATE_LIMIT_5H_EXCEEDED": "モデル {model} の5時間コスト制限を超過しました:${current} USD(制限:${limit} USD)", + "MODEL_RATE_LIMIT_DAILY_QUOTA_EXCEEDED": "モデル {model} の日次クォータを超過しました:${current} USD(制限:${limit} USD)", + "MODEL_RATE_LIMIT_WEEKLY_EXCEEDED": "モデル {model} の週次コスト制限を超過しました:${current} USD(制限:${limit} USD)", + "MODEL_RATE_LIMIT_MONTHLY_EXCEEDED": "モデル {model} の月次コスト制限を超過しました:${current} USD(制限:${limit} USD)", + "MODEL_RATE_LIMIT_TOTAL_EXCEEDED": "モデル {model} の総支出制限を超過しました:${current} / ${limit} USD", + "MODEL_RATE_LIMIT_UNAVAILABLE": "モデル {model} のレート制限チェックを一時的に実行できないため、リクエストを拒否しました(fail-closed)", "RATE_LIMIT_CONCURRENT_SESSIONS_EXCEEDED": "同時セッション制限を超過しました:現在 {current} セッション(制限:{limit})。アクティブなセッションが完了するまでお待ちください", "RESOURCE_BUSY": "リソースは現在使用中です", "INVALID_STATE": "現在の状態では操作が許可されていません", diff --git a/messages/ja/myUsage.json b/messages/ja/myUsage.json index 80761d0a8..a74264ae2 100644 --- a/messages/ja/myUsage.json +++ b/messages/ja/myUsage.json @@ -27,7 +27,10 @@ "keyLevel": "キー", "userLevel": "ユーザー", "unlimited": "無制限", - "empty": "クォータ情報がありません" + "empty": "クォータ情報がありません", + "countedInGlobalLabel": "グローバル枠計上", + "modelGroupOnlyLabel": "モデルグループ分算", + "splitNote": "モデルグループ制限に命中したコストは軸ごとに完全分割されます。グローバル枠には計上されず、モデルグループ予算内のみで計量されます。" }, "logs": { "title": "利用ログ", @@ -125,6 +128,11 @@ "monthly": "月次", "total": "合計" }, + "modelGroupQuota": { + "sectionTitle": "モデルグループのクォータ", + "modelsLabel": "モデル", + "note": "以下のモデルグループは個別に制限されています。該当する使用量はモデルグループの予算で計測され、上記のグローバルクォータには計上されません。" + }, "logsCollapsible": { "title": "使用ログ", "lastStatus": "最終: {code} ({time})", diff --git a/messages/ja/quota.json b/messages/ja/quota.json index bb6c51510..57140f426 100644 --- a/messages/ja/quota.json +++ b/messages/ja/quota.json @@ -57,8 +57,86 @@ "description": "すべてのレベルでクォータ使用状況を表示および管理", "tabs": { "users": "ユーザークォータ", + "modelLimits": "モデル別上限", + "modelGroups": "モデルグループ", + "userGroups": "ユーザーグループ", "keys": "キークォータ", "providers": "プロバイダークォータ" + }, + "subTabs": { + "config": "上限設定", + "modelGroups": "モデルグループ", + "userGroups": "ユーザーグループ" + } + }, + "modelLimits": { + "title": "モデルグループ別上限", + "description": "対象(ユーザー、ユーザーグループ)とモデルグループごとにコスト上限を設定します。モデルグループ上限に達した場合、そのリクエストはグローバルユーザー上限から完全に切り離されます。", + "disabledNotice": "モデル別上限は現在有効ではありません。環境変数 ENABLE_MODEL_RATE_LIMIT=true(ENABLE_RATE_LIMIT=true が前提)を設定すると有効になります。設定は事前に行えます。", + "loadError": "モデルグループ上限の読み込みに失敗しました", + "deleteSuccess": "上限を削除しました", + "deleteError": "上限の削除に失敗しました", + "noData": "上限設定はありません", + "allSubjects": "すべての対象", + "allModelGroups": "すべてのモデルグループ", + "searchSubject": "検索...", + "searchModelGroup": "モデルグループを検索...", + "noResults": "一致なし", + "addLimit": "上限を追加", + "searchLimits": "対象 / モデルグループを検索", + "expandMembers": "メンバーの展開 / 折りたたみ", + "subjectType": "対象種別", + "subjectTypeUser": "ユーザー", + "subjectTypeUserGroup": "ユーザーグループ", + "subjectTypeKey": "キー", + "modelGroup": "モデルグループ", + "semanticsNote": "モデルグループ上限に達した場合、そのリクエストはグローバルユーザー/キー配額から完全に切り離されます(完全分離)。RPM と同時セッション制限は引き続き有効です。ユーザーグループ上限はメンバーごとに独立して適用されます。同一ウィンドウに複数のソースがある場合は最も緩い値(最大値)が使用されます。", + "resetMode": { + "fixed": "固定ウィンドウ", + "rolling": "ローリングウィンドウ" + }, + "table": { + "subjectType": "対象種別", + "subject": "対象", + "modelGroup": "モデルグループ", + "fiveHour": "5時間", + "daily": "日次", + "weekly": "週次", + "monthly": "月次", + "total": "合計", + "resetMode": "リセットモード", + "actions": "操作" + }, + "deleteConfirm": { + "title": "上限を削除", + "description": "モデルグループ \"{group}\" の上限設定を削除しますか?この操作は取り消せません。", + "cancel": "キャンセル", + "confirm": "削除" + }, + "dialog": { + "addTitle": "モデルグループ上限を追加", + "editTitle": "モデルグループ上限を編集", + "description": "モデルグループ \"{group}\" のコスト上限。空欄にするとその期間は無制限になります。", + "fiveHour": "5時間上限", + "daily": "日次上限", + "weekly": "週次上限", + "monthly": "月次上限", + "total": "合計上限", + "unlimited": "無制限", + "resetMode": "5時間リセットモード", + "resetModeFixed": "固定ウィンドウ", + "resetModeRolling": "ローリングウィンドウ", + "save": "保存", + "saveSuccess": "上限を保存しました", + "saveError": "上限の保存に失敗しました", + "selectDescription": "選択した対象とモデルグループにコスト上限を設定します。空欄にするとその期間は無制限になります。", + "subjectRequired": "対象を選択してください", + "modelGroupRequired": "モデルグループを選択してください" + }, + "boosts": { + "panelTitle": "一時クォータブースト", + "dialogDescription": "ユーザー「{user}」のモデルグループ「{group}」に一時的な上限引き上げを付与します。", + "configuredCount": "一時提額を {count} 件設定済み" } }, "users": { @@ -389,5 +467,147 @@ "saveFailed": "更新に失敗しました", "invalidNumber": "有効な数値を入力してください", "negativeNotAllowed": "負の値は入力できません" - } + }, + "modelGroups": { + "title": "モデルグループ管理", + "description": "モデルグループを作成して、きめ細かいレート制限を設定します。各モデルは1つのグループにのみ所属できます(グローバル排他)。", + "semanticsNote": "リクエストがモデルグループの制限に一致すると、そのリクエストに対してユーザーまたはKeyのグローバルコスト制限は適用されません(完全分離)。RPMと同時接続制限は常に有効です。", + "totalCount": "{count} グループ", + "addGroup": "グループを作成", + "addSingleton": "単一モデルグループ", + "singleton": "シングルトン", + "group": "グループ", + "noData": "モデルグループがありません。「グループを作成」をクリックしてください。", + "noMembers": "メンバーなし", + "nameRequired": "グループ名は必須です。", + "modelRequired": "モデル名は必須です。", + "duplicateName": "同名のグループが既に存在します。", + "memberConflict": "モデル「{model}」は既にグループ「{groupName}」(id={groupId})に所属しています。", + "loadError": "モデルグループの読み込みに失敗しました。", + "createSuccess": "モデルグループを作成しました。", + "createError": "モデルグループの作成に失敗しました。", + "updateSuccess": "モデルグループを更新しました。", + "updateError": "モデルグループの更新に失敗しました。", + "deleteSuccess": "モデルグループ「{name}」を削除しました。", + "deleteError": "モデルグループの削除に失敗しました。", + "memberUpdatePartialError": "一部のメンバー変更が失敗しました:{errors}", + "deleteConfirm": { + "title": "モデルグループを削除しますか?", + "description": "グループ「{name}」とすべてのモデルマッピングが完全に削除されます。この操作は元に戻せません。", + "cancel": "キャンセル", + "confirm": "削除" + }, + "table": { + "name": "グループ名", + "description": "説明", + "members": "モデル", + "type": "種類", + "actions": "操作" + }, + "dialog": { + "addTitle": "モデルグループを作成", + "addDescription": "複数のモデルをグループにまとめ、統一的なレート制限ポリシーを設定します。", + "addSingletonTitle": "単一モデルグループを作成", + "addSingletonDescription": "1つのモデルを素早くシングルトングループにラップして、モデル別レート制限を実現します。", + "manageTitle": "グループを編集:{name}", + "manageDescription": "グループ名の変更またはメンバーモデルの調整。モデルはシステム全体で1つのグループにのみ所属できます。", + "name": "グループ名", + "nameOptional": "任意", + "namePlaceholder": "例:gpt-4-family", + "namePlaceholderSingleton": "デフォルトはモデル名", + "description": "説明", + "descriptionPlaceholder": "任意の説明", + "model": "モデル", + "modelPlaceholder": "モデル名を選択または入力", + "searchModel": "モデルを検索...", + "noModels": "モデルが見つかりません", + "members": "メンバーモデル", + "addMember": "追加", + "selectedCount": "{count} 件選択中", + "memberConflictNote": "モデルが既に別のグループに所属している場合、追加は失敗します(グローバル排他)。", + "cancel": "キャンセル", + "save": "保存" + } + }, + "userGroups": { + "title": "ユーザーグループ管理", + "description": "タグをユーザーグループとして登録します。メンバーシップはユーザーのタグから派生します。", + "loading": "読み込み中...", + "totalCount": "{count} 個のグループを登録済み", + "createGroup": "グループを作成", + "noGroups": "ユーザーグループが未登録です", + "members": "人のメンバー", + "tagRequired": "タグを選択してください", + "duplicateTag": "このタグはすでにグループとして登録されています", + "form": { + "tag": "タグ", + "tagPlaceholder": "タグを選択", + "noAvailableTags": "すべてのタグが登録済みです", + "name": "表示名", + "namePlaceholder": "任意の表示名", + "description": "説明", + "descriptionPlaceholder": "任意の説明", + "cancel": "キャンセル", + "create": "作成", + "save": "保存", + "saving": "保存中..." + }, + "createDialog": { + "title": "ユーザーグループを作成", + "description": "既存のユーザータグを選択してグループとして登録します。" + }, + "editDialog": { + "title": "ユーザーグループを編集", + "description": "グループを編集中:{tag}" + }, + "deleteDialog": { + "title": "ユーザーグループを削除", + "description": "グループ「{tag}」を削除しますか?ユーザーのタグは変更されません。", + "confirm": "削除" + } + }, + "quotaBoosts": { + "errors": { + "list_failed": "一時クォータ付与の一覧取得に失敗しました。", + "create_failed": "一時クォータ付与の作成に失敗しました。", + "delete_failed": "一時クォータ付与の取り消しに失敗しました。", + "invalid_validity_range": "終了日時は開始日時より後でなければなりません。", + "action_failed": "一時クォータ操作に失敗しました。" + }, + "note": "ブーストは validFrom の時刻に有効化され、このユーザーとモデルグループの基本上限に加算されます。変更はキャッシュ TTL 以内に反映されます。", + "window": "ウィンドウ", + "window_5h": "5時間", + "window_daily": "日次", + "window_weekly": "週次", + "window_monthly": "月次", + "window_total": "合計", + "amount": "ブースト金額", + "validFrom": "有効開始日時", + "validTo": "有効終了日時", + "note_label": "メモ", + "notePlaceholder": "管理者メモ(任意)", + "addBoost": "ブーストを追加", + "noBoosts": "このユーザーとモデルグループのブーストはありません", + "createSuccess": "クォータブーストを追加しました", + "deleteSuccess": "クォータブーストを取り消しました", + "invalidAmount": "有効な正の金額を入力してください", + "validityRequired": "有効開始日時と終了日時を入力してください", + "revokeConfirm": { + "title": "クォータブーストを取り消す", + "description": "このクォータブーストを永久に取り消します。効果はキャッシュ TTL 以内に伝播します。", + "cancel": "キャンセル", + "confirm": "取り消す" + }, + "table": { + "window": "ウィンドウ", + "amount": "金額", + "validFrom": "有効開始", + "validTo": "有効終了", + "note": "メモ", + "actions": "操作" + } + }, + "countedInGlobalLabel": "グローバル枠計上", + "modelGroupOnlyLabel": "モデルグループ分算", + "splitNote": "モデルグループ制限に命中したコストは軸ごとに完全分割されます。グローバル枠には計上されず、モデルグループ予算内のみで計量されます。" } diff --git a/messages/ja/settings/providers/strings.json b/messages/ja/settings/providers/strings.json index a2a510c99..0af2f36fe 100644 --- a/messages/ja/settings/providers/strings.json +++ b/messages/ja/settings/providers/strings.json @@ -48,6 +48,16 @@ "viewModeList": "リスト", "viewModeVendor": "ベンダー", "viewModeGroups": "グループ", + "viewModeModels": "モデル", + "modelView": { + "search": "モデルを検索...", + "modelCount": "{count} 件のモデル", + "providerCount": "{count} 件のプロバイダー", + "unrestricted": "無制限", + "unrestrictedDesc": "これらのプロバイダーはモデルの制限がなく、任意のモデルリクエストを受け付けます。", + "noModels": "特定のモデルが設定されたプロバイダーが見つかりません。", + "noResults": "一致するモデルが見つかりません。" + }, "endpoints": "エンドポイント", "manualProbe": "テスト", "addEndpoint": "エンドポイントを追加", diff --git a/messages/ru/errors.json b/messages/ru/errors.json index 87562a3ef..27a473f97 100644 --- a/messages/ru/errors.json +++ b/messages/ru/errors.json @@ -64,6 +64,12 @@ "RATE_LIMIT_WEEKLY_EXCEEDED": "Превышен недельный лимит расходов: ${current} USD (лимит: ${limit} USD). Сброс в {resetTime}", "RATE_LIMIT_MONTHLY_EXCEEDED": "Превышен месячный лимит расходов: ${current} USD (лимит: ${limit} USD). Сброс в {resetTime}", "RATE_LIMIT_TOTAL_EXCEEDED": "Превышен общий лимит расходов: ${current} / ${limit} USD", + "MODEL_RATE_LIMIT_5H_EXCEEDED": "Модель {model}: превышен 5-часовой лимит расходов: ${current} USD (лимит: ${limit} USD)", + "MODEL_RATE_LIMIT_DAILY_QUOTA_EXCEEDED": "Модель {model}: превышена дневная квота: ${current} USD (лимит: ${limit} USD)", + "MODEL_RATE_LIMIT_WEEKLY_EXCEEDED": "Модель {model}: превышен недельный лимит расходов: ${current} USD (лимит: ${limit} USD)", + "MODEL_RATE_LIMIT_MONTHLY_EXCEEDED": "Модель {model}: превышен месячный лимит расходов: ${current} USD (лимит: ${limit} USD)", + "MODEL_RATE_LIMIT_TOTAL_EXCEEDED": "Модель {model}: превышен общий лимит расходов: ${current} / ${limit} USD", + "MODEL_RATE_LIMIT_UNAVAILABLE": "Проверка лимита для модели {model} временно недоступна; запрос отклонён (fail-closed)", "RATE_LIMIT_CONCURRENT_SESSIONS_EXCEEDED": "Превышен лимит одновременных сессий: {current} сессий (лимит: {limit}). Пожалуйста, дождитесь завершения активных сессий", "RESOURCE_BUSY": "Ресурс в настоящее время используется", "INVALID_STATE": "Операция не разрешена в текущем состоянии", diff --git a/messages/ru/myUsage.json b/messages/ru/myUsage.json index c85dd01ea..6aea79295 100644 --- a/messages/ru/myUsage.json +++ b/messages/ru/myUsage.json @@ -27,7 +27,10 @@ "keyLevel": "Ключ", "userLevel": "Пользователь", "unlimited": "Без лимита", - "empty": "Нет данных о лимитах" + "empty": "Нет данных о лимитах", + "countedInGlobalLabel": "В глобальном лимите", + "modelGroupOnlyLabel": "Только группа моделей", + "splitNote": "Расходы, попавшие под лимит группы моделей, учитываются отдельно: они не входят в глобальный лимит и рассчитываются только в рамках бюджета группы моделей." }, "logs": { "title": "Журнал использования", @@ -125,6 +128,11 @@ "monthly": "Месяц", "total": "Всего" }, + "modelGroupQuota": { + "sectionTitle": "Квоты групп моделей", + "modelsLabel": "Модели", + "note": "Эти группы моделей ограничены отдельно; соответствующее потребление учитывается в бюджете группы моделей и не засчитывается в глобальную квоту выше." + }, "logsCollapsible": { "title": "Журнал запросов", "lastStatus": "Посл.: {code} ({time})", diff --git a/messages/ru/quota.json b/messages/ru/quota.json index 0ebe777cc..bc2a2451b 100644 --- a/messages/ru/quota.json +++ b/messages/ru/quota.json @@ -57,8 +57,86 @@ "description": "Просмотр и управление использованием квот на всех уровнях", "tabs": { "users": "Квоты пользователей", + "modelLimits": "Лимиты по моделям", + "modelGroups": "Группы моделей", + "userGroups": "Группы пользователей", "keys": "Квоты ключей", "providers": "Квоты провайдеров" + }, + "subTabs": { + "config": "Настройка лимитов", + "modelGroups": "Группы моделей", + "userGroups": "Группы пользователей" + } + }, + "modelLimits": { + "title": "Лимиты по группам моделей", + "description": "Установите лимиты расходов для субъекта (пользователя, группы пользователей) по группе моделей. При достижении лимита группы моделей запрос полностью выходит из глобального пользовательского лимита.", + "disabledNotice": "Лимиты по моделям сейчас не активны. Установите ENABLE_MODEL_RATE_LIMIT=true (требуется ENABLE_RATE_LIMIT=true), чтобы включить их. Настроить можно заранее.", + "loadError": "Не удалось загрузить лимиты по группам моделей", + "deleteSuccess": "Лимит удалён", + "deleteError": "Не удалось удалить лимит", + "noData": "Лимиты не настроены", + "allSubjects": "Все субъекты", + "allModelGroups": "Все группы моделей", + "searchSubject": "Поиск...", + "searchModelGroup": "Поиск группы моделей...", + "noResults": "Нет результатов", + "addLimit": "Добавить лимит", + "searchLimits": "Поиск субъекта или группы моделей", + "expandMembers": "Развернуть / свернуть участников", + "subjectType": "Тип субъекта", + "subjectTypeUser": "Пользователь", + "subjectTypeUserGroup": "Группа пользователей", + "subjectTypeKey": "Ключ", + "modelGroup": "Группа моделей", + "semanticsNote": "При достижении лимита группы моделей запрос полностью отделяется от глобальной квоты пользователя/ключа (полное разделение). Ограничения RPM и параллельных сессий продолжают действовать. Лимиты группы пользователей применяются отдельно для каждого участника. При наличии нескольких источников для одного окна используется наибольшее значение.", + "resetMode": { + "fixed": "Фиксированное окно", + "rolling": "Скользящее окно" + }, + "table": { + "subjectType": "Тип субъекта", + "subject": "Субъект", + "modelGroup": "Группа моделей", + "fiveHour": "5 ч", + "daily": "День", + "weekly": "Неделя", + "monthly": "Месяц", + "total": "Всего", + "resetMode": "Режим сброса", + "actions": "Действия" + }, + "deleteConfirm": { + "title": "Удалить лимит", + "description": "Удалить настройку лимита для группы моделей \"{group}\"? Это действие нельзя отменить.", + "cancel": "Отмена", + "confirm": "Удалить" + }, + "dialog": { + "addTitle": "Добавить лимит группы моделей", + "editTitle": "Изменить лимит группы моделей", + "description": "Лимиты расходов для группы моделей \"{group}\". Оставьте поле пустым, чтобы не ограничивать период.", + "fiveHour": "Лимит за 5 часов", + "daily": "Дневной лимит", + "weekly": "Недельный лимит", + "monthly": "Месячный лимит", + "total": "Общий лимит", + "unlimited": "Без ограничений", + "resetMode": "Режим сброса 5 часов", + "resetModeFixed": "Фиксированное окно", + "resetModeRolling": "Скользящее окно", + "save": "Сохранить", + "saveSuccess": "Лимит сохранён", + "saveError": "Не удалось сохранить лимит", + "selectDescription": "Задайте лимиты затрат для выбранного субъекта и группы моделей. Оставьте поле пустым, чтобы окно осталось без ограничений.", + "subjectRequired": "Выберите субъект", + "modelGroupRequired": "Выберите группу моделей" + }, + "boosts": { + "panelTitle": "Временные надбавки к квоте", + "dialogDescription": "Выдача временного увеличения квоты для пользователя «{user}» в группе моделей «{group}».", + "configuredCount": "Настроено надбавок: {count}" } }, "users": { @@ -412,5 +490,147 @@ "saveFailed": "Ошибка обновления", "invalidNumber": "Введите корректное число", "negativeNotAllowed": "Не может быть отрицательным" - } + }, + "modelGroups": { + "title": "Управление группами моделей", + "description": "Создавайте группы моделей для точного управления лимитами. Каждая модель может принадлежать только одной группе (глобальная уникальность).", + "semanticsNote": "Когда запрос попадает под лимит группы моделей, глобальный лимит стоимости для пользователя или ключа на этот запрос не применяется (полное разделение). Лимиты RPM и одновременных сессий всегда активны.", + "totalCount": "{count} группы", + "addGroup": "Создать группу", + "addSingleton": "Группа одной модели", + "singleton": "Одиночная", + "group": "Групповая", + "noData": "Групп моделей нет. Нажмите «Создать группу».", + "noMembers": "Нет участников", + "nameRequired": "Название группы обязательно.", + "modelRequired": "Название модели обязательно.", + "duplicateName": "Группа с таким именем уже существует.", + "memberConflict": "Модель «{model}» уже принадлежит группе «{groupName}» (id={groupId}).", + "loadError": "Не удалось загрузить группы моделей.", + "createSuccess": "Группа моделей создана.", + "createError": "Не удалось создать группу моделей.", + "updateSuccess": "Группа моделей обновлена.", + "updateError": "Не удалось обновить группу моделей.", + "deleteSuccess": "Группа моделей «{name}» удалена.", + "deleteError": "Не удалось удалить группу моделей.", + "memberUpdatePartialError": "Некоторые изменения участников не удались: {errors}", + "deleteConfirm": { + "title": "Удалить группу моделей?", + "description": "Группа «{name}» и все её маппинги моделей будут удалены безвозвратно.", + "cancel": "Отмена", + "confirm": "Удалить" + }, + "table": { + "name": "Название группы", + "description": "Описание", + "members": "Модели", + "type": "Тип", + "actions": "Действия" + }, + "dialog": { + "addTitle": "Создать группу моделей", + "addDescription": "Объедините несколько моделей в группу с единой политикой лимитов.", + "addSingletonTitle": "Создать группу одной модели", + "addSingletonDescription": "Быстро оберните одну модель в группу-одиночку для постановки лимитов на уровне модели.", + "manageTitle": "Редактировать группу: {name}", + "manageDescription": "Переименуйте группу или измените список моделей. Модель может принадлежать только одной группе в системе.", + "name": "Название группы", + "nameOptional": "необязательно", + "namePlaceholder": "например, gpt-4-family", + "namePlaceholderSingleton": "По умолчанию — название модели", + "description": "Описание", + "descriptionPlaceholder": "Необязательное описание", + "model": "Модель", + "modelPlaceholder": "Выберите или введите модель", + "searchModel": "Поиск моделей...", + "noModels": "Модели не найдены", + "members": "Модели-участники", + "addMember": "Добавить", + "selectedCount": "Выбрано: {count}", + "memberConflictNote": "Если модель уже принадлежит другой группе, добавление завершится ошибкой.", + "cancel": "Отмена", + "save": "Сохранить" + } + }, + "userGroups": { + "title": "Управление группами пользователей", + "description": "Регистрируйте теги как группы пользователей. Членство определяется на основе тегов пользователей.", + "loading": "Загрузка...", + "totalCount": "Зарегистрировано групп: {count}", + "createGroup": "Создать группу", + "noGroups": "Групп пользователей не создано", + "members": "участников", + "tagRequired": "Выберите тег", + "duplicateTag": "Этот тег уже зарегистрирован как группа", + "form": { + "tag": "Тег", + "tagPlaceholder": "Выберите тег", + "noAvailableTags": "Все теги уже зарегистрированы", + "name": "Отображаемое имя", + "namePlaceholder": "Необязательное имя", + "description": "Описание", + "descriptionPlaceholder": "Необязательное описание", + "cancel": "Отмена", + "create": "Создать", + "save": "Сохранить", + "saving": "Сохранение..." + }, + "createDialog": { + "title": "Создать группу пользователей", + "description": "Выберите существующий тег пользователя для регистрации в качестве группы." + }, + "editDialog": { + "title": "Редактировать группу пользователей", + "description": "Редактирование группы: {tag}" + }, + "deleteDialog": { + "title": "Удалить группу пользователей", + "description": "Удалить группу \"{tag}\"? Теги пользователей не будут изменены.", + "confirm": "Удалить" + } + }, + "quotaBoosts": { + "errors": { + "list_failed": "Не удалось получить список временных надбавок к квоте.", + "create_failed": "Не удалось создать временную надбавку к квоте.", + "delete_failed": "Не удалось отозвать временную надбавку к квоте.", + "invalid_validity_range": "Время окончания должно быть позже времени начала.", + "action_failed": "Операция с временной надбавкой к квоте завершилась ошибкой." + }, + "note": "Надбавки активируются в момент validFrom и суммируются поверх базового лимита для данного пользователя и группы моделей. Изменения применяются в течение TTL кэша.", + "window": "Окно", + "window_5h": "5 часов", + "window_daily": "Дневное", + "window_weekly": "Недельное", + "window_monthly": "Месячное", + "window_total": "Итого", + "amount": "Сумма надбавки", + "validFrom": "Действует с", + "validTo": "Действует до", + "note_label": "Примечание", + "notePlaceholder": "Примечание администратора (необязательно)", + "addBoost": "Добавить надбавку", + "noBoosts": "Надбавки для этого пользователя и группы моделей не настроены", + "createSuccess": "Надбавка к квоте добавлена", + "deleteSuccess": "Надбавка к квоте отозвана", + "invalidAmount": "Введите допустимую положительную сумму", + "validityRequired": "Заполните время начала и окончания действия", + "revokeConfirm": { + "title": "Отозвать надбавку к квоте", + "description": "Надбавка к квоте будет безвозвратно отозвана. Изменение применится в течение TTL кэша.", + "cancel": "Отмена", + "confirm": "Отозвать" + }, + "table": { + "window": "Окно", + "amount": "Сумма", + "validFrom": "Действует с", + "validTo": "Действует до", + "note": "Примечание", + "actions": "Действия" + } + }, + "countedInGlobalLabel": "В глобальном лимите", + "modelGroupOnlyLabel": "Только группа моделей", + "splitNote": "Расходы, попавшие под лимит группы моделей, учитываются отдельно: они не входят в глобальный лимит и рассчитываются только в рамках бюджета группы моделей." } diff --git a/messages/ru/settings/providers/strings.json b/messages/ru/settings/providers/strings.json index 766820c04..db26df1f6 100644 --- a/messages/ru/settings/providers/strings.json +++ b/messages/ru/settings/providers/strings.json @@ -48,6 +48,16 @@ "viewModeList": "Список", "viewModeVendor": "Вендор", "viewModeGroups": "Группы", + "viewModeModels": "Модели", + "modelView": { + "search": "Поиск моделей...", + "modelCount": "{count} моделей", + "providerCount": "{count} провайдеров", + "unrestricted": "Без ограничений", + "unrestrictedDesc": "У этих провайдеров нет ограничений по моделям — они принимают любые запросы.", + "noModels": "Провайдеры с конкретными моделями не найдены.", + "noResults": "Совпадающих моделей не найдено." + }, "endpoints": "Эндпоинты", "manualProbe": "Проверка", "addEndpoint": "Добавить эндпоинт", diff --git a/messages/zh-CN/errors.json b/messages/zh-CN/errors.json index b8d037487..accde6bd5 100644 --- a/messages/zh-CN/errors.json +++ b/messages/zh-CN/errors.json @@ -64,6 +64,12 @@ "RATE_LIMIT_WEEKLY_EXCEEDED": "周消费超限:当前 ${current} USD(限制:${limit} USD)。将于 {resetTime} 重置", "RATE_LIMIT_MONTHLY_EXCEEDED": "月消费超限:当前 ${current} USD(限制:${limit} USD)。将于 {resetTime} 重置", "RATE_LIMIT_TOTAL_EXCEEDED": "总消费上限已达到:${current} / ${limit} USD", + "MODEL_RATE_LIMIT_5H_EXCEEDED": "模型 {model} 5小时消费超限:当前 ${current} USD(限制:${limit} USD)", + "MODEL_RATE_LIMIT_DAILY_QUOTA_EXCEEDED": "模型 {model} 每日额度超限:当前 ${current} USD(限制:${limit} USD)", + "MODEL_RATE_LIMIT_WEEKLY_EXCEEDED": "模型 {model} 周消费超限:当前 ${current} USD(限制:${limit} USD)", + "MODEL_RATE_LIMIT_MONTHLY_EXCEEDED": "模型 {model} 月消费超限:当前 ${current} USD(限制:${limit} USD)", + "MODEL_RATE_LIMIT_TOTAL_EXCEEDED": "模型 {model} 总消费上限已达到:${current} / ${limit} USD", + "MODEL_RATE_LIMIT_UNAVAILABLE": "模型 {model} 限额校验暂不可用,已按 fail-closed 策略拒绝请求", "RATE_LIMIT_CONCURRENT_SESSIONS_EXCEEDED": "并发 Session 超限:当前 {current} 个(限制:{limit} 个)。请等待活跃 Session 完成", "RESOURCE_BUSY": "资源正在使用中", "INVALID_STATE": "当前状态不允许此操作", diff --git a/messages/zh-CN/myUsage.json b/messages/zh-CN/myUsage.json index 5b230d9df..f6d8931b6 100644 --- a/messages/zh-CN/myUsage.json +++ b/messages/zh-CN/myUsage.json @@ -27,7 +27,10 @@ "keyLevel": "密钥", "userLevel": "用户", "unlimited": "不限", - "empty": "暂无额度数据" + "empty": "暂无额度数据", + "countedInGlobalLabel": "计入全局额", + "modelGroupOnlyLabel": "模型组单算", + "splitNote": "命中模型组限额的消费按轴完全切分:不计入该轴全局额度,仅在模型组预算内单独计量。" }, "logs": { "title": "使用日志", @@ -125,6 +128,11 @@ "monthly": "月", "total": "总计" }, + "modelGroupQuota": { + "sectionTitle": "模型组配额", + "modelsLabel": "模型", + "note": "以下模型组已单独限额;命中后该轴消费按模型组预算单独计量,不计入上方全局额度。" + }, "logsCollapsible": { "title": "使用日志", "lastStatus": "最近: {code} ({time})", diff --git a/messages/zh-CN/quota.json b/messages/zh-CN/quota.json index 63918d285..2b8be5e1c 100644 --- a/messages/zh-CN/quota.json +++ b/messages/zh-CN/quota.json @@ -57,8 +57,86 @@ "description": "查看和管理所有层级的限额使用情况", "tabs": { "users": "用户限额", + "modelLimits": "按模型限额", + "modelGroups": "模型组", + "userGroups": "用户组", "keys": "密钥限额", "providers": "供应商限额" + }, + "subTabs": { + "config": "限额配置", + "modelGroups": "模型组", + "userGroups": "用户组" + } + }, + "modelLimits": { + "title": "按模型组限额", + "description": "为指定主体(用户、用户组)和模型组设置成本上限。命中模型组限额时,该请求完全脱离全局用户限额计算。", + "disabledNotice": "按模型限额当前未启用,需设置环境变量 ENABLE_MODEL_RATE_LIMIT=true(依赖 ENABLE_RATE_LIMIT=true)后生效,配置可提前完成。", + "loadError": "加载模型组限额失败", + "deleteSuccess": "已删除限额", + "deleteError": "删除限额失败", + "noData": "暂无限额配置", + "allSubjects": "所有主体", + "allModelGroups": "所有模型组", + "searchSubject": "搜索...", + "searchModelGroup": "搜索模型组...", + "noResults": "无匹配结果", + "addLimit": "添加限额", + "searchLimits": "搜索主体 / 模型组", + "expandMembers": "展开 / 收起成员", + "subjectType": "主体类型", + "subjectTypeUser": "用户", + "subjectTypeUserGroup": "用户组", + "subjectTypeKey": "密钥", + "modelGroup": "模型组", + "semanticsNote": "命中模型组限额时,该请求完全脱离全局用户/密钥配额(完全分区),RPM 和并发会话限制仍然有效。用户组限额对每个成员单独生效。同一窗口有多个来源时取最宽松(最大值)。", + "resetMode": { + "fixed": "固定窗口", + "rolling": "滚动窗口" + }, + "table": { + "subjectType": "主体类型", + "subject": "主体", + "modelGroup": "模型组", + "fiveHour": "5 小时", + "daily": "每日", + "weekly": "每周", + "monthly": "每月", + "total": "总计", + "resetMode": "重置模式", + "actions": "操作" + }, + "deleteConfirm": { + "title": "删除限额", + "description": "确定要删除模型组 \"{group}\" 的限额配置吗?此操作无法撤销。", + "cancel": "取消", + "confirm": "删除" + }, + "dialog": { + "addTitle": "新增模型组限额", + "editTitle": "编辑模型组限额", + "description": "模型组 \"{group}\" 的成本上限,留空表示该周期不限制。", + "fiveHour": "5 小时限额", + "daily": "每日限额", + "weekly": "每周限额", + "monthly": "每月限额", + "total": "总限额", + "unlimited": "不限制", + "resetMode": "5 小时重置模式", + "resetModeFixed": "固定窗口", + "resetModeRolling": "滚动窗口", + "save": "保存", + "saveSuccess": "限额已保存", + "saveError": "保存限额失败", + "selectDescription": "为所选主体与模型组设置成本上限,留空表示该周期不限制。", + "subjectRequired": "请选择主体", + "modelGroupRequired": "请选择模型组" + }, + "boosts": { + "panelTitle": "临时提额", + "dialogDescription": "为用户「{user}」在模型组「{group}」上发放临时额度提升。", + "configuredCount": "已配置 {count} 项临时提额" } }, "users": { @@ -412,5 +490,147 @@ "saveFailed": "更新失败", "invalidNumber": "请输入有效数字", "negativeNotAllowed": "不能为负数" - } + }, + "modelGroups": { + "title": "模型组管理", + "description": "创建模型组,用于精细化限额控制。每个模型只能属于一个组(全局互斥)。", + "semanticsNote": "请求命中模型组限额后,该轴(用户或 Key)的全局成本限额对该请求不再生效(完全切分)。RPM 与并发限制始终生效。", + "totalCount": "{count} 个组", + "addGroup": "新建组", + "addSingleton": "单模型快捷组", + "singleton": "单模型", + "group": "多模型", + "noData": "暂无模型组,点击「新建组」创建。", + "noMembers": "无成员", + "nameRequired": "组名不能为空。", + "modelRequired": "模型名不能为空。", + "duplicateName": "同名组已存在。", + "memberConflict": "模型「{model}」已属于组「{groupName}」(id={groupId}),每个模型只能属于一个组。", + "loadError": "加载模型组失败。", + "createSuccess": "模型组创建成功。", + "createError": "创建模型组失败。", + "updateSuccess": "模型组更新成功。", + "updateError": "更新模型组失败。", + "deleteSuccess": "已删除模型组「{name}」。", + "deleteError": "删除模型组失败。", + "memberUpdatePartialError": "部分成员变更失败:{errors}", + "deleteConfirm": { + "title": "确认删除模型组?", + "description": "将永久删除组「{name}」及其所有模型映射,此操作不可撤销。", + "cancel": "取消", + "confirm": "删除" + }, + "table": { + "name": "组名", + "description": "描述", + "members": "模型成员", + "type": "类型", + "actions": "操作" + }, + "dialog": { + "addTitle": "新建模型组", + "addDescription": "将多个模型归入一个组,统一配置限额策略。", + "addSingletonTitle": "新建单模型快捷组", + "addSingletonDescription": "为单个模型快速创建一个单元素组,实现按模型限额。", + "manageTitle": "编辑组:{name}", + "manageDescription": "重命名或调整成员模型。模型在全系统只能属于一个组。", + "name": "组名", + "nameOptional": "可选", + "namePlaceholder": "如 gpt-4-family", + "namePlaceholderSingleton": "默认使用模型名", + "description": "描述", + "descriptionPlaceholder": "可选描述", + "model": "模型", + "modelPlaceholder": "选择或输入模型名", + "searchModel": "搜索模型...", + "noModels": "未找到模型", + "members": "成员模型", + "addMember": "添加", + "selectedCount": "已选 {count} 个模型", + "memberConflictNote": "若模型已属于其他组,添加将失败(全局互斥)。", + "cancel": "取消", + "save": "保存" + } + }, + "userGroups": { + "title": "用户组管理", + "description": "将标签注册为用户组,成员资格由用户标签派生。", + "loading": "加载中...", + "totalCount": "已注册 {count} 个用户组", + "createGroup": "创建用户组", + "noGroups": "暂无用户组", + "members": "名成员", + "tagRequired": "请选择标签", + "duplicateTag": "该标签已注册为用户组", + "form": { + "tag": "标签", + "tagPlaceholder": "选择标签", + "noAvailableTags": "所有标签已注册", + "name": "显示名称", + "namePlaceholder": "可选显示名称", + "description": "描述", + "descriptionPlaceholder": "可选描述", + "cancel": "取消", + "create": "创建", + "save": "保存", + "saving": "保存中..." + }, + "createDialog": { + "title": "创建用户组", + "description": "选择一个已有用户标签注册为用户组。" + }, + "editDialog": { + "title": "编辑用户组", + "description": "正在编辑用户组:{tag}" + }, + "deleteDialog": { + "title": "删除用户组", + "description": "确定删除用户组 '{tag}' 吗?此操作不会修改任何用户的标签。", + "confirm": "删除" + } + }, + "quotaBoosts": { + "errors": { + "list_failed": "获取临时提额授予列表失败。", + "create_failed": "创建临时提额授予失败。", + "delete_failed": "撤销临时提额授予失败。", + "invalid_validity_range": "到期时间必须晚于开始时间。", + "action_failed": "临时提额操作失败。" + }, + "note": "提额在 validFrom 时刻激活,在该用户和模型组的基础限额上累加叠加。变更在缓存 TTL 内生效。", + "window": "时间窗口", + "window_5h": "5 小时", + "window_daily": "每日", + "window_weekly": "每周", + "window_monthly": "每月", + "window_total": "总计", + "amount": "提额金额", + "validFrom": "生效时间", + "validTo": "到期时间", + "note_label": "备注", + "notePlaceholder": "管理员备注(可选)", + "addBoost": "添加提额", + "noBoosts": "该用户与模型组暂无提额配置", + "createSuccess": "提额已添加", + "deleteSuccess": "提额已撤销", + "invalidAmount": "请输入合法的正数金额", + "validityRequired": "请填写生效时间和到期时间", + "revokeConfirm": { + "title": "撤销临时提额", + "description": "将永久撤销该提额授予,效果在缓存 TTL 内传播。", + "cancel": "取消", + "confirm": "撤销" + }, + "table": { + "window": "时间窗口", + "amount": "金额", + "validFrom": "生效时间", + "validTo": "到期时间", + "note": "备注", + "actions": "操作" + } + }, + "countedInGlobalLabel": "计入全局额", + "modelGroupOnlyLabel": "模型组单算", + "splitNote": "命中模型组限额的消费按轴完全切分:不计入该轴全局额度,仅在模型组预算内单独计量。" } diff --git a/messages/zh-CN/settings/providers/strings.json b/messages/zh-CN/settings/providers/strings.json index d202008ab..084690c8e 100644 --- a/messages/zh-CN/settings/providers/strings.json +++ b/messages/zh-CN/settings/providers/strings.json @@ -48,6 +48,16 @@ "viewModeList": "列表", "viewModeVendor": "服务商", "viewModeGroups": "分组", + "viewModeModels": "模型", + "modelView": { + "search": "搜索模型...", + "modelCount": "{count} 个模型", + "providerCount": "{count} 个供应商", + "unrestricted": "无限制", + "unrestrictedDesc": "这些供应商未配置具体的模型限制,接受任意模型请求。", + "noModels": "未找到配置了具体模型的供应商。", + "noResults": "未找到匹配的模型。" + }, "endpoints": "服务端点", "manualProbe": "测速", "addEndpoint": "添加端点", diff --git a/messages/zh-TW/errors.json b/messages/zh-TW/errors.json index 07d104c72..ac6aae670 100644 --- a/messages/zh-TW/errors.json +++ b/messages/zh-TW/errors.json @@ -64,6 +64,12 @@ "RATE_LIMIT_WEEKLY_EXCEEDED": "週消費超限:當前 ${current} USD(限制:${limit} USD)。將於 {resetTime} 重置", "RATE_LIMIT_MONTHLY_EXCEEDED": "月消費超限:當前 ${current} USD(限制:${limit} USD)。將於 {resetTime} 重置", "RATE_LIMIT_TOTAL_EXCEEDED": "總消費上限已達到:${current} / ${limit} USD", + "MODEL_RATE_LIMIT_5H_EXCEEDED": "模型 {model} 5小時消費超限:當前 ${current} USD(限制:${limit} USD)", + "MODEL_RATE_LIMIT_DAILY_QUOTA_EXCEEDED": "模型 {model} 每日額度超限:當前 ${current} USD(限制:${limit} USD)", + "MODEL_RATE_LIMIT_WEEKLY_EXCEEDED": "模型 {model} 週消費超限:當前 ${current} USD(限制:${limit} USD)", + "MODEL_RATE_LIMIT_MONTHLY_EXCEEDED": "模型 {model} 月消費超限:當前 ${current} USD(限制:${limit} USD)", + "MODEL_RATE_LIMIT_TOTAL_EXCEEDED": "模型 {model} 總消費上限已達到:${current} / ${limit} USD", + "MODEL_RATE_LIMIT_UNAVAILABLE": "模型 {model} 限額校驗暫不可用,已依 fail-closed 策略拒絕請求", "RATE_LIMIT_CONCURRENT_SESSIONS_EXCEEDED": "並發 Session 超限:當前 {current} 個(限制:{limit} 個)。請等待活躍 Session 完成", "RESOURCE_BUSY": "資源正在使用中", "INVALID_STATE": "當前狀態不允許此操作", diff --git a/messages/zh-TW/myUsage.json b/messages/zh-TW/myUsage.json index 41be1b6e8..9055939c1 100644 --- a/messages/zh-TW/myUsage.json +++ b/messages/zh-TW/myUsage.json @@ -27,7 +27,10 @@ "keyLevel": "金鑰", "userLevel": "使用者", "unlimited": "不限制", - "empty": "暫無額度資料" + "empty": "暫無額度資料", + "countedInGlobalLabel": "計入全局額", + "modelGroupOnlyLabel": "模型組單算", + "splitNote": "命中模型組限額的消費按軸完全切分:不計入該軸全局額度,僅在模型組預算內單獨計量。" }, "logs": { "title": "使用紀錄", @@ -125,6 +128,11 @@ "monthly": "每月", "total": "總計" }, + "modelGroupQuota": { + "sectionTitle": "模型組配額", + "modelsLabel": "模型", + "note": "以下模型組已單獨限額;命中後該軸消費按模型組預算單獨計量,不計入上方全域額度。" + }, "logsCollapsible": { "title": "使用記錄", "lastStatus": "最近:{code}({time})", diff --git a/messages/zh-TW/quota.json b/messages/zh-TW/quota.json index bfa7a99fc..605dbe584 100644 --- a/messages/zh-TW/quota.json +++ b/messages/zh-TW/quota.json @@ -57,8 +57,86 @@ "description": "查看和管理所有層級的限額使用情況", "tabs": { "users": "用戶限額", + "modelLimits": "按模型限額", + "modelGroups": "模型組", + "userGroups": "用戶組", "keys": "密鑰限額", "providers": "供應商限額" + }, + "subTabs": { + "config": "限額設定", + "modelGroups": "模型組", + "userGroups": "用戶組" + } + }, + "modelLimits": { + "title": "按模型組限額", + "description": "為指定主體(使用者、使用者組)和模型組設定成本上限。命中模型組限額時,該請求完全脫離全域使用者限額計算。", + "disabledNotice": "按模型限額目前未啟用,需設定環境變數 ENABLE_MODEL_RATE_LIMIT=true(依賴 ENABLE_RATE_LIMIT=true)後生效,組態可提前完成。", + "loadError": "載入模型組限額失敗", + "deleteSuccess": "已刪除限額", + "deleteError": "刪除限額失敗", + "noData": "尚無限額組態", + "allSubjects": "所有主體", + "allModelGroups": "所有模型組", + "searchSubject": "搜尋...", + "searchModelGroup": "搜尋模型組...", + "noResults": "無符合結果", + "addLimit": "新增限額", + "searchLimits": "搜尋主體 / 模型組", + "expandMembers": "展開 / 收合成員", + "subjectType": "主體類型", + "subjectTypeUser": "使用者", + "subjectTypeUserGroup": "使用者組", + "subjectTypeKey": "密鑰", + "modelGroup": "模型組", + "semanticsNote": "命中模型組限額時,該請求完全脫離全域使用者/密鑰配額(完全分區),RPM 和並行連線限制仍然有效。使用者組限額對每個成員單獨生效。同一視窗有多個來源時取最寬鬆(最大值)。", + "resetMode": { + "fixed": "固定視窗", + "rolling": "滾動視窗" + }, + "table": { + "subjectType": "主體類型", + "subject": "主體", + "modelGroup": "模型組", + "fiveHour": "5 小時", + "daily": "每日", + "weekly": "每週", + "monthly": "每月", + "total": "總計", + "resetMode": "重置模式", + "actions": "操作" + }, + "deleteConfirm": { + "title": "刪除限額", + "description": "確定要刪除模型組 \"{group}\" 的限額組態嗎?此操作無法復原。", + "cancel": "取消", + "confirm": "刪除" + }, + "dialog": { + "addTitle": "新增模型組限額", + "editTitle": "編輯模型組限額", + "description": "模型組 \"{group}\" 的成本上限,留空表示該週期不限制。", + "fiveHour": "5 小時限額", + "daily": "每日限額", + "weekly": "每週限額", + "monthly": "每月限額", + "total": "總限額", + "unlimited": "不限制", + "resetMode": "5 小時重置模式", + "resetModeFixed": "固定視窗", + "resetModeRolling": "滾動視窗", + "save": "儲存", + "saveSuccess": "限額已儲存", + "saveError": "儲存限額失敗", + "selectDescription": "為所選主體與模型組設定成本上限,留空表示該週期不限制。", + "subjectRequired": "請選擇主體", + "modelGroupRequired": "請選擇模型組" + }, + "boosts": { + "panelTitle": "臨時提額", + "dialogDescription": "為使用者「{user}」在模型組「{group}」上發放臨時額度提升。", + "configuredCount": "已設定 {count} 項臨時提額" } }, "users": { @@ -387,5 +465,147 @@ "saveFailed": "更新失敗", "invalidNumber": "請輸入有效數字", "negativeNotAllowed": "不能為負數" - } + }, + "modelGroups": { + "title": "模型組管理", + "description": "建立模型組,用於細緻的限額控制。每個模型只能屬於一個組(全域互斥)。", + "semanticsNote": "請求命中模型組限額後,該軸(使用者或 Key)的全域成本限額對該請求不再生效(完全切分)。RPM 與並發限制仍然生效。", + "totalCount": "{count} 個組", + "addGroup": "新建組", + "addSingleton": "單模型快捷組", + "singleton": "單模型", + "group": "多模型", + "noData": "暫無模型組,點擊「新建組」建立。", + "noMembers": "無成員", + "nameRequired": "組名不能為空。", + "modelRequired": "模型名不能為空。", + "duplicateName": "同名組已存在。", + "memberConflict": "模型「{model}」已屬於組「{groupName}」(id={groupId}),每個模型只能屬於一個組。", + "loadError": "載入模型組失敗。", + "createSuccess": "模型組建立成功。", + "createError": "建立模型組失敗。", + "updateSuccess": "模型組更新成功。", + "updateError": "更新模型組失敗。", + "deleteSuccess": "已刪除模型組「{name}」。", + "deleteError": "刪除模型組失敗。", + "memberUpdatePartialError": "部分成員變更失敗:{errors}", + "deleteConfirm": { + "title": "確認刪除模型組?", + "description": "將永久刪除組「{name}」及其所有模型映射,此操作無法復原。", + "cancel": "取消", + "confirm": "刪除" + }, + "table": { + "name": "組名", + "description": "描述", + "members": "模型成員", + "type": "類型", + "actions": "操作" + }, + "dialog": { + "addTitle": "新建模型組", + "addDescription": "將多個模型歸入一個組,統一配置限額策略。", + "addSingletonTitle": "新建單模型快捷組", + "addSingletonDescription": "為單個模型快速建立一個單元素組,實現按模型限額。", + "manageTitle": "編輯組:{name}", + "manageDescription": "重新命名或調整成員模型。模型在全系統只能屬於一個組。", + "name": "組名", + "nameOptional": "可選", + "namePlaceholder": "如 gpt-4-family", + "namePlaceholderSingleton": "預設使用模型名", + "description": "描述", + "descriptionPlaceholder": "可選描述", + "model": "模型", + "modelPlaceholder": "選擇或輸入模型名", + "searchModel": "搜尋模型...", + "noModels": "未找到模型", + "members": "成員模型", + "addMember": "新增", + "selectedCount": "已選 {count} 個模型", + "memberConflictNote": "若模型已屬於其他組,新增將失敗(全域互斥)。", + "cancel": "取消", + "save": "儲存" + } + }, + "userGroups": { + "title": "用戶群組管理", + "description": "將標籤注冊為用戶群組,成員資格由用戶標籤派生。", + "loading": "載入中...", + "totalCount": "已注冊 {count} 個用戶群組", + "createGroup": "建立用戶群組", + "noGroups": "尚無用戶群組", + "members": "位成員", + "tagRequired": "請選擇標籤", + "duplicateTag": "該標籤已注冊為用戶群組", + "form": { + "tag": "標籤", + "tagPlaceholder": "選擇標籤", + "noAvailableTags": "所有標籤已注冊", + "name": "顯示名稱", + "namePlaceholder": "可選顯示名稱", + "description": "描述", + "descriptionPlaceholder": "可選描述", + "cancel": "取消", + "create": "建立", + "save": "儲存", + "saving": "儲存中..." + }, + "createDialog": { + "title": "建立用戶群組", + "description": "選擇一個已有用戶標籤注冊為用戶群組。" + }, + "editDialog": { + "title": "編輯用戶群組", + "description": "正在編輯用戶群組:{tag}" + }, + "deleteDialog": { + "title": "刪除用戶群組", + "description": "確定刪除用戶群組「{tag}」嗎?此操作不會修改任何用戶的標籤。", + "confirm": "刪除" + } + }, + "quotaBoosts": { + "errors": { + "list_failed": "取得臨時提額授予清單失敗。", + "create_failed": "建立臨時提額授予失敗。", + "delete_failed": "撤銷臨時提額授予失敗。", + "invalid_validity_range": "到期時間必須晚於開始時間。", + "action_failed": "臨時提額操作失敗。" + }, + "note": "提額在 validFrom 時刻啟動,在該使用者和模型組的基礎限額上累加疊加。變更在快取 TTL 內生效。", + "window": "時間視窗", + "window_5h": "5 小時", + "window_daily": "每日", + "window_weekly": "每週", + "window_monthly": "每月", + "window_total": "總計", + "amount": "提額金額", + "validFrom": "生效時間", + "validTo": "到期時間", + "note_label": "備註", + "notePlaceholder": "管理員備註(選填)", + "addBoost": "新增提額", + "noBoosts": "該使用者與模型組尚無提額組態", + "createSuccess": "提額已新增", + "deleteSuccess": "提額已撤銷", + "invalidAmount": "請輸入有效的正數金額", + "validityRequired": "請填寫生效時間和到期時間", + "revokeConfirm": { + "title": "撤銷臨時提額", + "description": "將永久撤銷該提額授予,效果在快取 TTL 內傳播。", + "cancel": "取消", + "confirm": "撤銷" + }, + "table": { + "window": "時間視窗", + "amount": "金額", + "validFrom": "生效時間", + "validTo": "到期時間", + "note": "備註", + "actions": "操作" + } + }, + "countedInGlobalLabel": "計入全局額", + "modelGroupOnlyLabel": "模型組單算", + "splitNote": "命中模型組限額的消費按軸完全切分:不計入該軸全局額度,僅在模型組預算內單獨計量。" } diff --git a/messages/zh-TW/settings/providers/strings.json b/messages/zh-TW/settings/providers/strings.json index 20d23a02f..595042217 100644 --- a/messages/zh-TW/settings/providers/strings.json +++ b/messages/zh-TW/settings/providers/strings.json @@ -48,6 +48,16 @@ "viewModeList": "清單", "viewModeVendor": "供應商", "viewModeGroups": "分組", + "viewModeModels": "模型", + "modelView": { + "search": "搜尋模型...", + "modelCount": "{count} 個模型", + "providerCount": "{count} 個供應商", + "unrestricted": "無限制", + "unrestrictedDesc": "這些供應商未設定具體模型限制,接受任意模型請求。", + "noModels": "未找到已配置具體模型的供應商。", + "noResults": "未找到符合的模型。" + }, "endpoints": "服務端點", "manualProbe": "測速", "addEndpoint": "新增端點", diff --git a/scripts/audit-settings-placeholders.allowlist.json b/scripts/audit-settings-placeholders.allowlist.json index 3d18b1acf..b551d5fbc 100644 --- a/scripts/audit-settings-placeholders.allowlist.json +++ b/scripts/audit-settings-placeholders.allowlist.json @@ -1,5 +1,9 @@ { "entries": [ + { + "key": "myUsage.modelGroupQuota.modelsLabel", + "reason": "zh-cn==zh-tw common ui term" + }, { "key": "providers.form.codexInstructions", "reason": "zh-cn==zh-tw title term" diff --git a/src/actions/key-quota.ts b/src/actions/key-quota.ts index 4578bc30e..2777f5f36 100644 --- a/src/actions/key-quota.ts +++ b/src/actions/key-quota.ts @@ -22,6 +22,10 @@ export interface KeyQuotaItem { mode?: "fixed" | "rolling"; time?: string; resetAt?: Date; + // group-rate-limit (§5.3 / §10): for cost windows, the portion counted toward the + // mainline global gate vs the model-group-only split-off. Absent for limitSessions. + countedInGlobalCurrent?: number; + modelGroupOnlyCurrent?: number; } export interface KeyQuotaUsageResult { @@ -100,7 +104,9 @@ export async function getKeyQuotaUsage(keyId: number): Promise ({ total: value, countedInGlobal: value }) + ) + : sumKeyCostSplitInTimeRange(keyId, clipStart(range5h.startTime), range5h.endTime), + sumKeyCostSplitInTimeRange( keyId, clipStart(keyDailyTimeRange.startTime), keyDailyTimeRange.endTime ), - sumKeyCostInTimeRange(keyId, clipStart(rangeWeekly.startTime), rangeWeekly.endTime), - sumKeyCostInTimeRange(keyId, clipStart(rangeMonthly.startTime), rangeMonthly.endTime), - sumKeyTotalCost(keyRow.key, Infinity, costResetAt), + sumKeyCostSplitInTimeRange(keyId, clipStart(rangeWeekly.startTime), rangeWeekly.endTime), + sumKeyCostSplitInTimeRange(keyId, clipStart(rangeMonthly.startTime), rangeMonthly.endTime), + sumKeyTotalCostSplit(keyRow.key, Infinity, costResetAt), SessionTracker.getKeySessionCount(keyId), ]); + const splitFields = (split: { total: number; countedInGlobal: number }) => { + const counted = Math.min(split.countedInGlobal, split.total); + return { + current: split.total, + countedInGlobalCurrent: counted, + modelGroupOnlyCurrent: Math.max(0, split.total - counted), + }; + }; + const items: KeyQuotaItem[] = [ { type: "limit5h", - current: cost5h, + ...splitFields(cost5h), limit: parseNumericLimit(keyRow.limit5hUsd), mode: limit5hResetMode, }, { type: "limitDaily", - current: costDaily, + ...splitFields(costDaily), limit: parseNumericLimit(keyRow.limitDailyUsd), mode: keyRow.dailyResetMode ?? "fixed", time: keyRow.dailyResetTime ?? "00:00", }, { type: "limitWeekly", - current: costWeekly, + ...splitFields(costWeekly), limit: parseNumericLimit(keyRow.limitWeeklyUsd), }, { type: "limitMonthly", - current: costMonthly, + ...splitFields(costMonthly), limit: parseNumericLimit(keyRow.limitMonthlyUsd), }, { type: "limitTotal", - current: totalCost, + ...splitFields(totalCost), limit: parseNumericLimit(keyRow.limitTotalUsd), resetAt: costResetAt ?? undefined, }, diff --git a/src/actions/keys.ts b/src/actions/keys.ts index a00882d42..0fae0f268 100644 --- a/src/actions/keys.ts +++ b/src/actions/keys.ts @@ -941,7 +941,9 @@ export async function getKeyLimitUsage(keyId: number): Promise< getTimeRangeForPeriodWithMode, } = await import("@/lib/rate-limit/time-utils"); const { RateLimitService } = await import("@/lib/rate-limit/service"); - const { sumKeyTotalCost, sumKeyCostInTimeRange } = await import("@/repository/statistics"); + const { sumKeyTotalCostSplit, sumKeyCostSplitInTimeRange } = await import( + "@/repository/statistics" + ); const effectiveConcurrentLimit = resolveKeyConcurrentSessionLimit( key.limitConcurrentSessions, result.userLimitConcurrentSessions ?? null @@ -966,22 +968,35 @@ export async function getKeyLimitUsage(keyId: number): Promise< const rangeMonthly = await getTimeRangeForPeriod("monthly"); // 获取金额消费(使用 DB direct,与 my-usage.ts 保持一致) + // group-rate-limit (§5.3/§10): split each cost window into counted-in-global vs + // model-group-only. 5h-fixed reads the runtime counter (global-counted post P0-1). const [cost5h, costDaily, costWeekly, costMonthly, totalCost, concurrentSessions] = await Promise.all([ limit5hResetMode === "fixed" - ? RateLimitService.getCurrentCost(keyId, "key", "5h", "00:00", limit5hResetMode) - : sumKeyCostInTimeRange(keyId, clipStart(range5h.startTime), range5h.endTime), - sumKeyCostInTimeRange( + ? RateLimitService.getCurrentCost(keyId, "key", "5h", "00:00", limit5hResetMode).then( + (value) => ({ total: value, countedInGlobal: value }) + ) + : sumKeyCostSplitInTimeRange(keyId, clipStart(range5h.startTime), range5h.endTime), + sumKeyCostSplitInTimeRange( keyId, clipStart(keyDailyTimeRange.startTime), keyDailyTimeRange.endTime ), - sumKeyCostInTimeRange(keyId, clipStart(rangeWeekly.startTime), rangeWeekly.endTime), - sumKeyCostInTimeRange(keyId, clipStart(rangeMonthly.startTime), rangeMonthly.endTime), - sumKeyTotalCost(key.key, Infinity, costResetAt), + sumKeyCostSplitInTimeRange(keyId, clipStart(rangeWeekly.startTime), rangeWeekly.endTime), + sumKeyCostSplitInTimeRange(keyId, clipStart(rangeMonthly.startTime), rangeMonthly.endTime), + sumKeyTotalCostSplit(key.key, Infinity, costResetAt), SessionTracker.getKeySessionCount(keyId), ]); + const splitFields = (split: { total: number; countedInGlobal: number }) => { + const counted = Math.min(split.countedInGlobal, split.total); + return { + current: split.total, + countedInGlobalCurrent: counted, + modelGroupOnlyCurrent: Math.max(0, split.total - counted), + }; + }; + // 获取重置时间 const resetAt5h = limit5hResetMode === "fixed" @@ -999,27 +1014,27 @@ export async function getKeyLimitUsage(keyId: number): Promise< ok: true, data: { cost5h: { - current: cost5h, + ...splitFields(cost5h), limit: key.limit5hUsd, resetAt: resetAt5h ?? undefined, }, costDaily: { - current: costDaily, + ...splitFields(costDaily), limit: key.limitDailyUsd, resetAt: resetInfoDaily.resetAt, }, costWeekly: { - current: costWeekly, + ...splitFields(costWeekly), limit: key.limitWeeklyUsd, resetAt: resetInfoWeekly.resetAt, }, costMonthly: { - current: costMonthly, + ...splitFields(costMonthly), limit: key.limitMonthlyUsd, resetAt: resetInfoMonthly.resetAt, }, costTotal: { - current: totalCost, + ...splitFields(totalCost), limit: key.limitTotalUsd ?? null, resetAt: costResetAt ?? undefined, }, diff --git a/src/actions/model-group.ts b/src/actions/model-group.ts new file mode 100644 index 000000000..49d35bd68 --- /dev/null +++ b/src/actions/model-group.ts @@ -0,0 +1,340 @@ +"use server"; + +import { getTranslations } from "next-intl/server"; +import { getSession } from "@/lib/auth"; +import { logger } from "@/lib/logger"; +import { publishModelLimitCacheInvalidation } from "@/lib/model-rate-limit/cache"; +import { ERROR_CODES } from "@/lib/utils/error-messages"; +import type { ModelGroupRow, ModelGroupWithMembers } from "@/repository/model-group"; +import { + findModelGroupIdByModel, + getModelGroup, + listModelGroupMembers, + ModelGroupMemberConflictError, + addModelGroupMember as repoAddModelGroupMember, + createModelGroup as repoCreateModelGroup, + createSingletonModelGroup as repoCreateSingletonModelGroup, + deleteModelGroup as repoDeleteModelGroup, + listModelGroups as repoListModelGroups, + removeModelGroupMember as repoRemoveModelGroupMember, + updateModelGroup as repoUpdateModelGroup, +} from "@/repository/model-group"; +import type { ActionResult } from "./types"; + +// --------------------------------------------------------------------------- +// Admin guard +// --------------------------------------------------------------------------- + +async function requireAdmin(): Promise< + { ok: true } | { ok: false; error: string; errorCode: string } +> { + const tError = await getTranslations("errors"); + const session = await getSession(); + if (!session || session.user.role !== "admin") { + return { ok: false, error: tError("UNAUTHORIZED"), errorCode: ERROR_CODES.UNAUTHORIZED }; + } + return { ok: true }; +} + +// --------------------------------------------------------------------------- +// List +// --------------------------------------------------------------------------- + +export async function listModelGroups(): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + try { + const data = await repoListModelGroups(); + return { ok: true, data }; + } catch (error) { + logger.error("listModelGroups failed", { error }); + const tError = await getTranslations("errors"); + return { + ok: false, + error: tError("OPERATION_FAILED"), + errorCode: ERROR_CODES.OPERATION_FAILED, + }; + } +} + +// --------------------------------------------------------------------------- +// Get single +// --------------------------------------------------------------------------- + +export async function getModelGroupById(id: number): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + try { + const tError = await getTranslations("errors"); + const data = await getModelGroup(id); + if (!data) return { ok: false, error: tError("NOT_FOUND"), errorCode: ERROR_CODES.NOT_FOUND }; + return { ok: true, data }; + } catch (error) { + logger.error("getModelGroupById failed", { id, error }); + const tError = await getTranslations("errors"); + return { + ok: false, + error: tError("OPERATION_FAILED"), + errorCode: ERROR_CODES.OPERATION_FAILED, + }; + } +} + +// --------------------------------------------------------------------------- +// Create +// --------------------------------------------------------------------------- + +export async function createModelGroup(input: { + name: string; + description?: string | null; + isSingleton?: boolean; +}): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + const t = await getTranslations("quota.modelGroups"); + const tError = await getTranslations("errors"); + + try { + const name = input.name?.trim(); + if (!name) { + return { ok: false, error: t("nameRequired"), errorCode: "NAME_REQUIRED" }; + } + + const data = await repoCreateModelGroup({ + name, + description: input.description ?? null, + isSingleton: input.isSingleton ?? false, + }); + return { ok: true, data }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + if (msg.includes("unique") || msg.includes("duplicate") || msg.includes("model_groups_name")) { + return { ok: false, error: t("duplicateName"), errorCode: "DUPLICATE_NAME" }; + } + logger.error("createModelGroup failed", { input, error }); + return { ok: false, error: tError("CREATE_FAILED"), errorCode: ERROR_CODES.CREATE_FAILED }; + } +} + +// --------------------------------------------------------------------------- +// Update +// --------------------------------------------------------------------------- + +export async function updateModelGroup( + id: number, + input: Partial<{ name: string; description: string | null; isSingleton: boolean }> +): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + const t = await getTranslations("quota.modelGroups"); + const tError = await getTranslations("errors"); + + try { + if (input.name !== undefined && !input.name.trim()) { + return { ok: false, error: t("nameRequired"), errorCode: "NAME_REQUIRED" }; + } + + const data = await repoUpdateModelGroup(id, input); + return { ok: true, data }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + if (msg.includes("not found")) { + return { ok: false, error: tError("NOT_FOUND"), errorCode: ERROR_CODES.NOT_FOUND }; + } + if (msg.includes("unique") || msg.includes("model_groups_name")) { + return { ok: false, error: t("duplicateName"), errorCode: "DUPLICATE_NAME" }; + } + logger.error("updateModelGroup failed", { id, input, error }); + return { ok: false, error: tError("UPDATE_FAILED"), errorCode: ERROR_CODES.UPDATE_FAILED }; + } +} + +// --------------------------------------------------------------------------- +// Delete +// --------------------------------------------------------------------------- + +export async function deleteModelGroup(id: number): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + const tError = await getTranslations("errors"); + + try { + const existing = await getModelGroup(id); + if (!existing) { + return { ok: false, error: tError("NOT_FOUND"), errorCode: ERROR_CODES.NOT_FOUND }; + } + + await repoDeleteModelGroup(id); + // Group removal (cascades members) changes the resolution snapshot's + // modelToGroupId / groupMembers; refresh + broadcast so the change applies + // immediately instead of after the 30s snapshot TTL. + await publishModelLimitCacheInvalidation(); + return { ok: true, data: undefined }; + } catch (error) { + logger.error("deleteModelGroup failed", { id, error }); + return { ok: false, error: tError("DELETE_FAILED"), errorCode: ERROR_CODES.DELETE_FAILED }; + } +} + +// --------------------------------------------------------------------------- +// Members +// --------------------------------------------------------------------------- + +export async function addModelGroupMember( + groupId: number, + model: string +): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + const t = await getTranslations("quota.modelGroups"); + const tError = await getTranslations("errors"); + + try { + const trimmedModel = model?.trim(); + if (!trimmedModel) { + return { ok: false, error: t("modelRequired"), errorCode: "MODEL_REQUIRED" }; + } + + await repoAddModelGroupMember(groupId, trimmedModel); + // Membership changes the snapshot's modelToGroupId / groupMembers; refresh + + // broadcast so the new per-model quota is enforced immediately (not after TTL). + await publishModelLimitCacheInvalidation(); + return { ok: true, data: undefined }; + } catch (error) { + if (error instanceof ModelGroupMemberConflictError) { + return { + ok: false, + error: t("memberConflict", { + model, + groupName: error.conflictGroupName, + groupId: error.conflictGroupId, + }), + errorCode: "MEMBER_CONFLICT", + errorParams: { + model, + groupName: error.conflictGroupName, + groupId: error.conflictGroupId, + }, + }; + } + logger.error("addModelGroupMember failed", { groupId, model, error }); + return { + ok: false, + error: tError("OPERATION_FAILED"), + errorCode: ERROR_CODES.OPERATION_FAILED, + }; + } +} + +export async function removeModelGroupMember( + groupId: number, + model: string +): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + const tError = await getTranslations("errors"); + + try { + await repoRemoveModelGroupMember(groupId, model); + // Membership changes the snapshot's modelToGroupId / groupMembers; refresh + + // broadcast so the model reverts to ungrouped immediately (not after TTL). + await publishModelLimitCacheInvalidation(); + return { ok: true, data: undefined }; + } catch (error) { + logger.error("removeModelGroupMember failed", { groupId, model, error }); + return { + ok: false, + error: tError("OPERATION_FAILED"), + errorCode: ERROR_CODES.OPERATION_FAILED, + }; + } +} + +export async function getModelGroupMembers(groupId: number): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + try { + const data = await listModelGroupMembers(groupId); + return { ok: true, data }; + } catch (error) { + logger.error("getModelGroupMembers failed", { groupId, error }); + const tError = await getTranslations("errors"); + return { + ok: false, + error: tError("OPERATION_FAILED"), + errorCode: ERROR_CODES.OPERATION_FAILED, + }; + } +} + +export async function lookupModelGroupByModel(model: string): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + try { + const data = await findModelGroupIdByModel(model); + return { ok: true, data }; + } catch (error) { + logger.error("lookupModelGroupByModel failed", { model, error }); + const tError = await getTranslations("errors"); + return { + ok: false, + error: tError("OPERATION_FAILED"), + errorCode: ERROR_CODES.OPERATION_FAILED, + }; + } +} + +// --------------------------------------------------------------------------- +// Singleton convenience +// --------------------------------------------------------------------------- + +export async function createSingletonModelGroup( + model: string, + name?: string +): Promise> { + const auth = await requireAdmin(); + if (!auth.ok) return auth; + + const t = await getTranslations("quota.modelGroups"); + const tError = await getTranslations("errors"); + + try { + const trimmedModel = model?.trim(); + if (!trimmedModel) { + return { ok: false, error: t("modelRequired"), errorCode: "MODEL_REQUIRED" }; + } + + const data = await repoCreateSingletonModelGroup(trimmedModel, name?.trim()); + // A singleton group registers a new model->group mapping; refresh + broadcast + // so the snapshot picks it up immediately instead of after the TTL. + await publishModelLimitCacheInvalidation(); + return { ok: true, data }; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + if (msg.includes("unique") || msg.includes("model_groups_name")) { + return { ok: false, error: t("duplicateName"), errorCode: "DUPLICATE_NAME" }; + } + if (error instanceof ModelGroupMemberConflictError) { + return { + ok: false, + error: t("memberConflict", { + model, + groupName: error.conflictGroupName, + groupId: error.conflictGroupId, + }), + errorCode: "MEMBER_CONFLICT", + }; + } + logger.error("createSingletonModelGroup failed", { model, name, error }); + return { ok: false, error: tError("CREATE_FAILED"), errorCode: ERROR_CODES.CREATE_FAILED }; + } +} diff --git a/src/actions/model-limit.ts b/src/actions/model-limit.ts new file mode 100644 index 000000000..e0db17516 --- /dev/null +++ b/src/actions/model-limit.ts @@ -0,0 +1,84 @@ +"use server"; + +import { revalidatePath } from "next/cache"; +import { getSession } from "@/lib/auth"; +import { logger } from "@/lib/logger"; +import { publishModelLimitCacheInvalidation } from "@/lib/model-rate-limit/cache"; +import { ERROR_CODES } from "@/lib/utils/error-messages"; +import { + deleteModelGroupLimit, + type LimitSubjectType, + listModelGroupLimits, + type ModelGroupLimitInput, + type ModelGroupLimitRecord, + upsertModelGroupLimit, +} from "@/repository/model-group-limit"; +import type { ActionResult } from "./types"; + +const SETTINGS_PATH = "/dashboard/quotas/model-limits"; + +function isAdmin(session: Awaited>): boolean { + return !!session && session.user.role === "admin"; +} + +export async function listModelGroupLimitsAction(filter: { + subjectType?: LimitSubjectType; + subjectId?: number; + modelGroupId?: number; +}): Promise> { + const session = await getSession(); + if (!isAdmin(session)) { + return { ok: false, error: "权限不足", errorCode: ERROR_CODES.UNAUTHORIZED }; + } + + try { + return { ok: true, data: await listModelGroupLimits(filter) }; + } catch (error) { + logger.error("[ModelLimitAction] Failed to list model group limits", { error, filter }); + return { ok: false, error: "获取按模型限额失败", errorCode: ERROR_CODES.OPERATION_FAILED }; + } +} + +export async function upsertModelGroupLimitAction( + subjectType: LimitSubjectType, + subjectId: number, + modelGroupId: number, + input: ModelGroupLimitInput +): Promise> { + const session = await getSession(); + if (!isAdmin(session)) { + return { ok: false, error: "权限不足", errorCode: ERROR_CODES.UNAUTHORIZED }; + } + + try { + const data = await upsertModelGroupLimit(subjectType, subjectId, modelGroupId, input); + await publishModelLimitCacheInvalidation(); + revalidatePath(SETTINGS_PATH); + return { ok: true, data }; + } catch (error) { + logger.error("[ModelLimitAction] Failed to upsert model group limit", { + error, + subjectType, + subjectId, + modelGroupId, + }); + return { ok: false, error: "保存按模型限额失败", errorCode: ERROR_CODES.OPERATION_FAILED }; + } +} + +export async function deleteModelGroupLimitAction(id: number): Promise { + const session = await getSession(); + if (!isAdmin(session)) { + return { ok: false, error: "权限不足", errorCode: ERROR_CODES.UNAUTHORIZED }; + } + + try { + await deleteModelGroupLimit(id); + await publishModelLimitCacheInvalidation(); + revalidatePath(SETTINGS_PATH); + return { ok: true }; + } catch (error) { + logger.error("[ModelLimitAction] Failed to delete model group limit", { error, id }); + return { ok: false, error: "删除按模型限额失败", errorCode: ERROR_CODES.DELETE_FAILED }; + } +} diff --git a/src/actions/my-usage.ts b/src/actions/my-usage.ts index a1536f2dc..752c8bffa 100644 --- a/src/actions/my-usage.ts +++ b/src/actions/my-usage.ts @@ -8,6 +8,7 @@ import { messageRequest, usageLedger } from "@/drizzle/schema"; import { getSession } from "@/lib/auth"; import { lookupIp } from "@/lib/ip-geo/client"; import { logger } from "@/lib/logger"; +import type { LimitWindow, ModelLimitBucket } from "@/lib/model-rate-limit/types"; import { resolveKeyConcurrentSessionLimit } from "@/lib/rate-limit/concurrent-session-limit"; import { clipStartByResetAt, @@ -178,11 +179,19 @@ export interface MyUsageQuota { keyLimitMonthlyUsd: number | null; keyLimitTotalUsd: number | null; keyLimitConcurrentSessions: number; + // group-rate-limit (§5.3 / §10): keyCurrent*Usd is the portion counted toward the + // mainline global gate (what the limit enforces); keyCurrent*ModelGroupOnlyUsd is the + // split-off model-group spend. Their sum is the total spend in that window. keyCurrent5hUsd: number; keyCurrentDailyUsd: number; keyCurrentWeeklyUsd: number; keyCurrentMonthlyUsd: number; keyCurrentTotalUsd: number; + keyCurrent5hModelGroupOnlyUsd: number; + keyCurrentDailyModelGroupOnlyUsd: number; + keyCurrentWeeklyModelGroupOnlyUsd: number; + keyCurrentMonthlyModelGroupOnlyUsd: number; + keyCurrentTotalModelGroupOnlyUsd: number; keyCurrentConcurrentSessions: number; userLimit5hUsd: number | null; @@ -196,6 +205,11 @@ export interface MyUsageQuota { userCurrentWeeklyUsd: number; userCurrentMonthlyUsd: number; userCurrentTotalUsd: number; + userCurrent5hModelGroupOnlyUsd: number; + userCurrentDailyModelGroupOnlyUsd: number; + userCurrentWeeklyModelGroupOnlyUsd: number; + userCurrentMonthlyModelGroupOnlyUsd: number; + userCurrentTotalModelGroupOnlyUsd: number; userCurrentConcurrentSessions: number; userLimitDailyUsd: number | null; @@ -216,6 +230,31 @@ export interface MyUsageQuota { dailyResetTime: string; } +/** One enforced cost window of a model-group quota view (only configured caps). */ +export interface MyModelGroupQuotaWindow { + window: LimitWindow; + current: number; + limit: number; +} + +/** Per-axis (key / user) windows for one model group on the my-usage page. */ +export interface MyModelGroupQuotaAxis { + axis: "key" | "user"; + windows: MyModelGroupQuotaWindow[]; +} + +/** + * A self-service model-group quota view. The page renders one card per entry + * when per-model rate limiting applies to the caller (see docs/limit §10). + */ +export interface MyModelGroupQuota { + modelGroupId: number; + modelGroupName: string; + models: string[]; + axes: MyModelGroupQuotaAxis[]; + currencyCode: CurrencyCode; +} + export interface MyTodayStats { calls: number; inputTokens: number; @@ -459,22 +498,30 @@ export async function getMyQuota(): Promise> { getUserConcurrentSessions(user.id), ]); - const { - cost5h: keyCurrent5hUsd, - costDaily: keyCostDaily, - costWeekly: keyCostWeekly, - costMonthly: keyCostMonthly, - costTotal: keyTotalCost, - } = keyCosts; - const { - cost5h: userCurrent5hUsd, - costDaily: userCostDaily, - costWeekly: userCostWeekly, - costMonthly: userCostMonthly, - costTotal: userTotalCost, - } = userCosts; - const resolvedKeyCurrent5hUsd = keyFixed5hUsd ?? keyCurrent5hUsd; - const resolvedUserCurrent5hUsd = userFixed5hUsd ?? userCurrent5hUsd; + // group-rate-limit (§5.3/§10): the gauge "used" shows the global-counted portion; the + // model-group-only split is `total - counted`. For 5h-fixed the runtime counter is + // already global-counted (post P0-1), so its model-group-only portion is 0. + const modelGroupOnly = (total: number, counted: number): number => + Math.max(0, total - Math.min(counted, total)); + + const keyCurrent5hUsd = keyFixed5hUsd ?? keyCosts.cost5hCounted; + const keyCurrentDailyUsd = keyCosts.costDailyCounted; + const keyCurrentWeeklyUsd = keyCosts.costWeeklyCounted; + const keyCurrentMonthlyUsd = keyCosts.costMonthlyCounted; + const keyTotalCost = keyCosts.costTotalCounted; + const keyCurrent5hModelGroupOnlyUsd = + keyFixed5hUsd != null ? 0 : modelGroupOnly(keyCosts.cost5h, keyCosts.cost5hCounted); + + const userCurrent5hUsd = userFixed5hUsd ?? userCosts.cost5hCounted; + const userCurrentDailyUsd = userCosts.costDailyCounted; + const userCurrentWeeklyUsd = userCosts.costWeeklyCounted; + const userCurrentMonthlyUsd = userCosts.costMonthlyCounted; + const userTotalCost = userCosts.costTotalCounted; + const userCurrent5hModelGroupOnlyUsd = + userFixed5hUsd != null ? 0 : modelGroupOnly(userCosts.cost5h, userCosts.cost5hCounted); + + const resolvedKeyCurrent5hUsd = keyCurrent5hUsd; + const resolvedUserCurrent5hUsd = userCurrent5hUsd; const quota: MyUsageQuota = { keyLimit5hUsd: key.limit5hUsd ?? null, @@ -484,10 +531,27 @@ export async function getMyQuota(): Promise> { keyLimitTotalUsd: key.limitTotalUsd ?? null, keyLimitConcurrentSessions: effectiveKeyConcurrentLimit, keyCurrent5hUsd: resolvedKeyCurrent5hUsd, - keyCurrentDailyUsd: keyCostDaily, - keyCurrentWeeklyUsd: keyCostWeekly, - keyCurrentMonthlyUsd: keyCostMonthly, + keyCurrentDailyUsd: keyCurrentDailyUsd, + keyCurrentWeeklyUsd: keyCurrentWeeklyUsd, + keyCurrentMonthlyUsd: keyCurrentMonthlyUsd, keyCurrentTotalUsd: keyTotalCost, + keyCurrent5hModelGroupOnlyUsd: keyCurrent5hModelGroupOnlyUsd, + keyCurrentDailyModelGroupOnlyUsd: modelGroupOnly( + keyCosts.costDaily, + keyCosts.costDailyCounted + ), + keyCurrentWeeklyModelGroupOnlyUsd: modelGroupOnly( + keyCosts.costWeekly, + keyCosts.costWeeklyCounted + ), + keyCurrentMonthlyModelGroupOnlyUsd: modelGroupOnly( + keyCosts.costMonthly, + keyCosts.costMonthlyCounted + ), + keyCurrentTotalModelGroupOnlyUsd: modelGroupOnly( + keyCosts.costTotal, + keyCosts.costTotalCounted + ), keyCurrentConcurrentSessions: keyConcurrent, userLimit5hUsd: user.limit5hUsd ?? null, @@ -497,10 +561,27 @@ export async function getMyQuota(): Promise> { userLimitConcurrentSessions: user.limitConcurrentSessions ?? null, userRpmLimit: user.rpm ?? null, userCurrent5hUsd: resolvedUserCurrent5hUsd, - userCurrentDailyUsd: userCostDaily, - userCurrentWeeklyUsd: userCostWeekly, - userCurrentMonthlyUsd: userCostMonthly, + userCurrentDailyUsd: userCurrentDailyUsd, + userCurrentWeeklyUsd: userCurrentWeeklyUsd, + userCurrentMonthlyUsd: userCurrentMonthlyUsd, userCurrentTotalUsd: userTotalCost, + userCurrent5hModelGroupOnlyUsd: userCurrent5hModelGroupOnlyUsd, + userCurrentDailyModelGroupOnlyUsd: modelGroupOnly( + userCosts.costDaily, + userCosts.costDailyCounted + ), + userCurrentWeeklyModelGroupOnlyUsd: modelGroupOnly( + userCosts.costWeekly, + userCosts.costWeeklyCounted + ), + userCurrentMonthlyModelGroupOnlyUsd: modelGroupOnly( + userCosts.costMonthly, + userCosts.costMonthlyCounted + ), + userCurrentTotalModelGroupOnlyUsd: modelGroupOnly( + userCosts.costTotal, + userCosts.costTotalCounted + ), userCurrentConcurrentSessions: userKeyConcurrent, userLimitDailyUsd: user.dailyQuota ?? null, @@ -528,6 +609,117 @@ export async function getMyQuota(): Promise> { } } +const MODEL_GROUP_QUOTA_WINDOWS: ReadonlyArray<{ + window: LimitWindow; + limitField: keyof ModelLimitBucket["caps"]; + costField: "cost5h" | "costDaily" | "costWeekly" | "costMonthly" | "costTotal"; +}> = [ + { window: "5h", limitField: "limit5hUsd", costField: "cost5h" }, + { window: "daily", limitField: "dailyLimitUsd", costField: "costDaily" }, + { window: "weekly", limitField: "limitWeeklyUsd", costField: "costWeekly" }, + { window: "monthly", limitField: "limitMonthlyUsd", costField: "costMonthly" }, + { window: "total", limitField: "limitTotalUsd", costField: "costTotal" }, +]; + +/** + * Per-model-group quota views for the self-service usage page. Returns one entry + * per model group the caller (user / key / its user groups) has a configured + * limit on, with current usage per enforced window. Empty when the per-model + * rate-limit feature is off or the caller has no model-group limits (the page + * then shows only the mainline quota card). + */ +export async function getMyModelGroupQuotas(): Promise> { + try { + const session = await getSession({ allowReadOnlyAccess: true }); + if (!session) return { ok: false, error: "Unauthorized" }; + + const { isModelRateLimitEnabled } = await import("@/lib/model-rate-limit/types"); + if (!isModelRateLimitEnabled()) return { ok: true, data: [] }; + + const { getModelLimitSnapshot } = await import("@/lib/model-rate-limit/cache"); + const { resolveAllSubjectModelLimits } = await import("@/lib/model-rate-limit/resolver"); + const { listModelGroups } = await import("@/repository/model-group"); + const { sumScopeQuotaCostsByModels } = await import("@/repository/statistics"); + const { getLeaseTimeRange } = await import("@/lib/rate-limit/lease"); + + const user = session.user; + const key = session.key; + const now = new Date(); + + const snapshot = await getModelLimitSnapshot(); + const buckets = resolveAllSubjectModelLimits(snapshot, { + userId: user.id, + keyId: key.id, + tags: user.tags ?? [], + now, + }); + if (buckets.length === 0) return { ok: true, data: [] }; + + const settings = await getSystemSettings(); + const currencyCode = settings.currencyDisplay; + + const groups = await listModelGroups(); + const groupNameById = new Map(groups.map((g) => [g.id, g.name])); + + // daily/weekly/monthly windows are shared across buckets (fixed mode); 5h + // follows each bucket's reset mode/costResetAt, so it is computed per bucket. + const [rangeDaily, rangeWeekly, rangeMonthly] = await Promise.all([ + getLeaseTimeRange("daily", "00:00", "fixed"), + getLeaseTimeRange("weekly", "00:00", "fixed"), + getLeaseTimeRange("monthly", "00:00", "fixed"), + ]); + + const axesByGroup = new Map(); + + await Promise.all( + buckets.map(async (bucket) => { + const { caps } = bucket; + const range5h = await getLeaseTimeRange("5h", "00:00", caps.limit5hResetMode); + const startTime5h = + caps.limit5hCostResetAt instanceof Date && caps.limit5hCostResetAt > range5h.startTime + ? caps.limit5hCostResetAt + : range5h.startTime; + + const costs = await sumScopeQuotaCostsByModels(bucket.axis, bucket.scopeId, bucket.models, { + range5h: { startTime: startTime5h, endTime: range5h.endTime }, + rangeDaily, + rangeWeekly, + rangeMonthly, + }); + + const windows: MyModelGroupQuotaWindow[] = MODEL_GROUP_QUOTA_WINDOWS.flatMap((def) => { + const limit = caps[def.limitField]; + if (typeof limit !== "number" || limit <= 0) return []; + return [{ window: def.window, current: costs[def.costField], limit }]; + }); + if (windows.length === 0) return; + + const entry = axesByGroup.get(bucket.modelGroupId) ?? { + models: bucket.models, + axes: [], + }; + entry.axes.push({ axis: bucket.axis, windows }); + axesByGroup.set(bucket.modelGroupId, entry); + }) + ); + + const data: MyModelGroupQuota[] = [...axesByGroup.entries()] + .map(([modelGroupId, { models, axes }]) => ({ + modelGroupId, + modelGroupName: groupNameById.get(modelGroupId) ?? `#${modelGroupId}`, + models, + axes: axes.sort((a, b) => (a.axis === b.axis ? 0 : a.axis === "key" ? -1 : 1)), + currencyCode, + })) + .sort((a, b) => a.modelGroupName.localeCompare(b.modelGroupName)); + + return { ok: true, data }; + } catch (error) { + logger.error("[my-usage] getMyModelGroupQuotas failed", error); + return { ok: false, error: "Failed to get model group quotas" }; + } +} + export async function getMyTodayStats(): Promise> { try { const session = await getSession({ allowReadOnlyAccess: true }); diff --git a/src/actions/quota-boost.ts b/src/actions/quota-boost.ts new file mode 100644 index 000000000..bb951acd6 --- /dev/null +++ b/src/actions/quota-boost.ts @@ -0,0 +1,119 @@ +"use server"; + +import { getSession } from "@/lib/auth"; +import { logger } from "@/lib/logger"; +import { publishModelLimitCacheInvalidation } from "@/lib/model-rate-limit/cache"; +import * as repo from "@/repository/quota-boost"; +import type { ActionResult } from "./types"; + +export async function listQuotaBoostGrantsAction(filter: { + userId?: number; + modelGroupId?: number; +}): Promise> { + try { + const session = await getSession(); + if (!session || session.user.role !== "admin") { + return { ok: false, error: "permission denied", errorCode: "auth.forbidden" }; + } + + const items = await repo.listQuotaBoostGrants(filter); + return { ok: true, data: items }; + } catch (error) { + logger.error("[QuotaBoostAction] Failed to list grants", { error }); + return { + ok: false, + error: "Failed to list quota boost grants.", + errorCode: "quota_boost.list_failed", + }; + } +} + +export async function createQuotaBoostGrantAction(input: { + userId: number; + modelGroupId: number; + window: repo.BoostWindow; + amountUsd: number; + validFrom: string; + validTo: string; + note?: string | null; +}): Promise> { + try { + const session = await getSession(); + if (!session || session.user.role !== "admin") { + return { ok: false, error: "permission denied", errorCode: "auth.forbidden" }; + } + + const validFrom = new Date(input.validFrom); + const validTo = new Date(input.validTo); + + if (validTo <= validFrom) { + return { + ok: false, + error: "validTo must be after validFrom.", + errorCode: "quota_boost.invalid_validity_range", + }; + } + + const row = await repo.createQuotaBoostGrant({ + userId: input.userId, + modelGroupId: input.modelGroupId, + window: input.window, + amountUsd: input.amountUsd, + validFrom, + validTo, + note: input.note ?? null, + createdBy: session.user.id, + }); + + // A boost grant feeds buildModelLimitSnapshot(); refresh + broadcast so an + // immediately-active grant takes effect without waiting for the snapshot TTL. + await publishModelLimitCacheInvalidation(); + + logger.info("[QuotaBoostAction] Created quota boost grant", { + grantId: row.id, + userId: input.userId, + modelGroupId: input.modelGroupId, + window: input.window, + amountUsd: input.amountUsd, + adminId: session.user.id, + }); + + return { ok: true, data: row }; + } catch (error) { + logger.error("[QuotaBoostAction] Failed to create grant", { error }); + return { + ok: false, + error: "Failed to create quota boost grant.", + errorCode: "quota_boost.create_failed", + }; + } +} + +export async function deleteQuotaBoostGrantAction(id: number): Promise { + try { + const session = await getSession(); + if (!session || session.user.role !== "admin") { + return { ok: false, error: "permission denied", errorCode: "auth.forbidden" }; + } + + await repo.deleteQuotaBoostGrant(id); + + // Revoking a grant changes the snapshot; refresh + broadcast so the boost + // stops applying immediately instead of after the snapshot TTL. + await publishModelLimitCacheInvalidation(); + + logger.info("[QuotaBoostAction] Revoked quota boost grant", { + grantId: id, + adminId: session.user.id, + }); + + return { ok: true }; + } catch (error) { + logger.error("[QuotaBoostAction] Failed to delete grant", { error }); + return { + ok: false, + error: "Failed to revoke quota boost grant.", + errorCode: "quota_boost.delete_failed", + }; + } +} diff --git a/src/actions/system-config.ts b/src/actions/system-config.ts index eb4a24636..28f095a6d 100644 --- a/src/actions/system-config.ts +++ b/src/actions/system-config.ts @@ -88,6 +88,11 @@ export async function saveSystemSettings(formData: { quotaLeasePercentWeekly?: number; quotaLeasePercentMonthly?: number; quotaLeaseCapUsd?: number | null; + quotaModelLeasePercent5h?: number | null; + quotaModelLeasePercentDaily?: number | null; + quotaModelLeasePercentWeekly?: number | null; + quotaModelLeasePercentMonthly?: number | null; + quotaModelLeaseMinSliceUsd?: number | null; publicStatusWindowHours?: number; publicStatusAggregationIntervalMinutes?: number; // IP 提取 / 归属地查询 @@ -140,6 +145,11 @@ export async function saveSystemSettings(formData: { quotaLeasePercentWeekly: validated.quotaLeasePercentWeekly, quotaLeasePercentMonthly: validated.quotaLeasePercentMonthly, quotaLeaseCapUsd: validated.quotaLeaseCapUsd, + quotaModelLeasePercent5h: validated.quotaModelLeasePercent5h, + quotaModelLeasePercentDaily: validated.quotaModelLeasePercentDaily, + quotaModelLeasePercentWeekly: validated.quotaModelLeasePercentWeekly, + quotaModelLeasePercentMonthly: validated.quotaModelLeasePercentMonthly, + quotaModelLeaseMinSliceUsd: validated.quotaModelLeaseMinSliceUsd, publicStatusWindowHours: validated.publicStatusWindowHours, publicStatusAggregationIntervalMinutes: validated.publicStatusAggregationIntervalMinutes, ipExtractionConfig: validated.ipExtractionConfig, diff --git a/src/actions/user-group.ts b/src/actions/user-group.ts new file mode 100644 index 000000000..cd89075fc --- /dev/null +++ b/src/actions/user-group.ts @@ -0,0 +1,167 @@ +"use server"; + +import { getTranslations } from "next-intl/server"; +import { getSession } from "@/lib/auth"; +import { logger } from "@/lib/logger"; +import { ERROR_CODES } from "@/lib/utils/error-messages"; +import type { UserGroupRow } from "@/repository/user-group"; +import { + countUsersByTags, + countUsersInUserGroup, + getUserGroupByTag, + createUserGroup as repoCreateUserGroup, + deleteUserGroup as repoDeleteUserGroup, + getUserGroup as repoGetUserGroup, + listUserGroups as repoListUserGroups, + updateUserGroup as repoUpdateUserGroup, +} from "@/repository/user-group"; +import type { ActionResult } from "./types"; + +export type UserGroupWithCount = UserGroupRow & { memberCount: number }; + +export type UserGroupCreateInput = { + tag: string; + name?: string | null; + description?: string | null; +}; + +export type UserGroupUpdateInput = { + name?: string | null; + description?: string | null; +}; + +async function requireAdmin() { + const session = await getSession(); + return session?.user.role === "admin" ? session : null; +} + +export async function listUserGroups(): Promise> { + const tError = await getTranslations("errors"); + try { + const session = await requireAdmin(); + if (!session) { + return { ok: false, error: tError("UNAUTHORIZED"), errorCode: ERROR_CODES.UNAUTHORIZED }; + } + + const groups = await repoListUserGroups(); + const counts = await countUsersByTags(groups.map((g) => g.tag)); + const data: UserGroupWithCount[] = groups.map((g) => ({ + ...g, + memberCount: counts.get(g.tag) ?? 0, + })); + return { ok: true, data }; + } catch (error) { + logger.error("Failed to list user groups:", error); + return { + ok: false, + error: tError("OPERATION_FAILED"), + errorCode: ERROR_CODES.OPERATION_FAILED, + }; + } +} + +export async function getUserGroup(id: number): Promise> { + const tError = await getTranslations("errors"); + try { + const session = await requireAdmin(); + if (!session) { + return { ok: false, error: tError("UNAUTHORIZED"), errorCode: ERROR_CODES.UNAUTHORIZED }; + } + + const group = await repoGetUserGroup(id); + if (!group) { + return { ok: false, error: tError("NOT_FOUND"), errorCode: ERROR_CODES.NOT_FOUND }; + } + + const memberCount = await countUsersInUserGroup(group.tag); + return { ok: true, data: { ...group, memberCount } }; + } catch (error) { + logger.error("Failed to get user group:", error); + return { + ok: false, + error: tError("OPERATION_FAILED"), + errorCode: ERROR_CODES.OPERATION_FAILED, + }; + } +} + +export async function createUserGroup( + input: UserGroupCreateInput +): Promise> { + const t = await getTranslations("quota.userGroups"); + const tError = await getTranslations("errors"); + try { + const session = await requireAdmin(); + if (!session) { + return { ok: false, error: tError("UNAUTHORIZED"), errorCode: ERROR_CODES.UNAUTHORIZED }; + } + + const tag = input.tag?.trim(); + if (!tag) { + return { ok: false, error: t("tagRequired"), errorCode: "TAG_REQUIRED" }; + } + + const existing = await getUserGroupByTag(tag); + if (existing) { + return { ok: false, error: t("duplicateTag"), errorCode: "DUPLICATE_TAG" }; + } + + const group = await repoCreateUserGroup({ + tag, + name: input.name, + description: input.description, + }); + return { ok: true, data: group }; + } catch (error) { + logger.error("Failed to create user group:", error); + return { ok: false, error: tError("CREATE_FAILED"), errorCode: ERROR_CODES.CREATE_FAILED }; + } +} + +export async function updateUserGroup( + id: number, + input: UserGroupUpdateInput +): Promise> { + const tError = await getTranslations("errors"); + try { + const session = await requireAdmin(); + if (!session) { + return { ok: false, error: tError("UNAUTHORIZED"), errorCode: ERROR_CODES.UNAUTHORIZED }; + } + + const existing = await repoGetUserGroup(id); + if (!existing) { + return { ok: false, error: tError("NOT_FOUND"), errorCode: ERROR_CODES.NOT_FOUND }; + } + + const updated = await repoUpdateUserGroup(id, { + name: input.name, + description: input.description, + }); + return { ok: true, data: updated }; + } catch (error) { + logger.error("Failed to update user group:", error); + return { ok: false, error: tError("UPDATE_FAILED"), errorCode: ERROR_CODES.UPDATE_FAILED }; + } +} + +export async function deleteUserGroup(id: number): Promise> { + const tError = await getTranslations("errors"); + try { + const session = await requireAdmin(); + if (!session) { + return { ok: false, error: tError("UNAUTHORIZED"), errorCode: ERROR_CODES.UNAUTHORIZED }; + } + + const existing = await repoGetUserGroup(id); + if (!existing) { + return { ok: false, error: tError("NOT_FOUND"), errorCode: ERROR_CODES.NOT_FOUND }; + } + + await repoDeleteUserGroup(id); + return { ok: true, data: undefined }; + } catch (error) { + logger.error("Failed to delete user group:", error); + return { ok: false, error: tError("DELETE_FAILED"), errorCode: ERROR_CODES.DELETE_FAILED }; + } +} diff --git a/src/actions/users.ts b/src/actions/users.ts index 1c543c155..02fe8a14e 100644 --- a/src/actions/users.ts +++ b/src/actions/users.ts @@ -2171,7 +2171,10 @@ export async function getUserAllLimitUsage(userId: number): Promise< "@/lib/rate-limit/time-utils" ); const { RateLimitService } = await import("@/lib/rate-limit/service"); - const { sumUserCostInTimeRange, sumUserTotalCost } = await import("@/repository/statistics"); + const { sumUserCostSplitInTimeRange, sumUserTotalCostSplit } = await import( + "@/repository/statistics" + ); + const { buildSplitWindow } = await import("@/lib/quota/limit-usage-split"); const limit5hResetMode = user.limit5hResetMode ?? "rolling"; const user5hCostResetAt = resolveUser5hCostResetAt( user.costResetAt ?? null, @@ -2192,26 +2195,31 @@ export async function getUserAllLimitUsage(userId: number): Promise< const clipStart = (start: Date): Date => clipStartByResetAt(start, user.costResetAt ?? null); const clip5hStart = (start: Date): Date => clipStartByResetAt(start, user5hCostResetAt); - // 并行查询各时间范围的消费 - // Note: sumUserTotalCost uses ALL_TIME_MAX_AGE_DAYS for all-time semantics + // 并行查询各时间范围的消费(group-rate-limit §5.3/§10:拆「计入全局 / 模型组单算」) + // Note: sumUserTotalCostSplit uses ALL_TIME_MAX_AGE_DAYS for all-time semantics const [usage5h, usageDaily, usageWeekly, usageMonthly, usageTotal] = await Promise.all([ limit5hResetMode === "fixed" - ? RateLimitService.getCurrentCost(userId, "user", "5h", "00:00", limit5hResetMode) - : sumUserCostInTimeRange(userId, clip5hStart(range5h.startTime), range5h.endTime), - sumUserCostInTimeRange(userId, clipStart(rangeDaily.startTime), rangeDaily.endTime), - sumUserCostInTimeRange(userId, clipStart(rangeWeekly.startTime), rangeWeekly.endTime), - sumUserCostInTimeRange(userId, clipStart(rangeMonthly.startTime), rangeMonthly.endTime), - sumUserTotalCost(userId, ALL_TIME_MAX_AGE_DAYS, user.costResetAt), + ? RateLimitService.getCurrentCost(userId, "user", "5h", "00:00", limit5hResetMode).then( + // group-rate-limit (§5.3): after the 5h-fixed counter excludes split spend, + // getCurrentCost already returns the global-counted value; the fixed window does + // not expose a model-group-only breakdown (non-default mode), so total == counted. + (value) => ({ total: value, countedInGlobal: value }) + ) + : sumUserCostSplitInTimeRange(userId, clip5hStart(range5h.startTime), range5h.endTime), + sumUserCostSplitInTimeRange(userId, clipStart(rangeDaily.startTime), rangeDaily.endTime), + sumUserCostSplitInTimeRange(userId, clipStart(rangeWeekly.startTime), rangeWeekly.endTime), + sumUserCostSplitInTimeRange(userId, clipStart(rangeMonthly.startTime), rangeMonthly.endTime), + sumUserTotalCostSplit(userId, ALL_TIME_MAX_AGE_DAYS, user.costResetAt), ]); return { ok: true, data: { - limit5h: { usage: usage5h, limit: user.limit5hUsd ?? null }, - limitDaily: { usage: usageDaily, limit: user.dailyQuota ?? null }, - limitWeekly: { usage: usageWeekly, limit: user.limitWeeklyUsd ?? null }, - limitMonthly: { usage: usageMonthly, limit: user.limitMonthlyUsd ?? null }, - limitTotal: { usage: usageTotal, limit: user.limitTotalUsd ?? null }, + limit5h: buildSplitWindow(usage5h, user.limit5hUsd ?? null), + limitDaily: buildSplitWindow(usageDaily, user.dailyQuota ?? null), + limitWeekly: buildSplitWindow(usageWeekly, user.limitWeeklyUsd ?? null), + limitMonthly: buildSplitWindow(usageMonthly, user.limitMonthlyUsd ?? null), + limitTotal: buildSplitWindow(usageTotal, user.limitTotalUsd ?? null), }, }; } catch (error) { diff --git a/src/app/[locale]/dashboard/_components/user/key-quota-usage-dialog.tsx b/src/app/[locale]/dashboard/_components/user/key-quota-usage-dialog.tsx index 1731ea44a..4c86f717a 100644 --- a/src/app/[locale]/dashboard/_components/user/key-quota-usage-dialog.tsx +++ b/src/app/[locale]/dashboard/_components/user/key-quota-usage-dialog.tsx @@ -57,6 +57,7 @@ export function KeyQuotaUsageDialog({ }: KeyQuotaUsageDialogProps) { const t = useTranslations("dashboard.userManagement.keyQuotaUsageDialog"); const tEdit = useTranslations("quota.quickEdit"); + const tQuota = useTranslations("quota"); const [loading, setLoading] = useState(false); const [data, setData] = useState(null); const [error, setError] = useState(false); @@ -183,41 +184,65 @@ export function KeyQuotaUsageDialog({
{t("fetchFailed")}
) : (
- {sortedItems?.map((item) => ( -
-
- {t(getLabelKey(item.type))} - - {formatValue(item.type, item.current)} /{" "} - handleSaveLimit(item.type, newLimit)} - allowClear={item.type !== "limitSessions"} - > - - - -
- {item.limit !== null && item.limit > 0 && ( - - )} - {item.type === "limitDaily" && item.mode && ( -
- {item.mode === "fixed" ? t("modeFixed") : t("modeRolling")} - {item.mode === "fixed" && item.time && ` (${item.time})`} + + +
- )} -
- ))} + {item.limit !== null && item.limit > 0 && ( + + )} + {modelGroupOnlyAmt > 0 && ( +
+ {tQuota("modelGroupOnlyLabel")}: + + {formatCurrency(modelGroupOnlyAmt, currencyCode)} + +
+ )} + {item.type === "limitDaily" && item.mode && ( +
+ {item.mode === "fixed" ? t("modeFixed") : t("modeRolling")} + {item.mode === "fixed" && item.time && ` (${item.time})`} +
+ )} +
+ ); + })} )} diff --git a/src/app/[locale]/dashboard/_components/user/user-limit-badge.tsx b/src/app/[locale]/dashboard/_components/user/user-limit-badge.tsx index f922360ae..acd712360 100644 --- a/src/app/[locale]/dashboard/_components/user/user-limit-badge.tsx +++ b/src/app/[locale]/dashboard/_components/user/user-limit-badge.tsx @@ -201,11 +201,14 @@ export function UserLimitBadge({ const key = getLimitTypeKey(limitType); const typeData = usageData[key]; const usage = typeData?.usage ?? 0; + // group-rate-limit (§5.3/§10): gauge compares countedInGlobalUsage against limit. + // When model-group-only spend is non-zero, raw usage would exceed the global gate value. + const gaugeUsage = typeData?.countedInGlobalUsage ?? usage; // Calculate percentage - const percentage = formatPercentage(usage, limit); - const colorClass = getPercentageColor(usage, limit); - const statusText = `${formatValue(usage, unit)} / ${formatValue(limit, unit)}`; + const percentage = formatPercentage(gaugeUsage, limit); + const colorClass = getPercentageColor(gaugeUsage, limit); + const statusText = `${formatValue(gaugeUsage, unit)} / ${formatValue(limit, unit)}`; const percentBadge = ( + + ); } diff --git a/src/app/[locale]/dashboard/quotas/_components/quota-tabs-nav.tsx b/src/app/[locale]/dashboard/quotas/_components/quota-tabs-nav.tsx new file mode 100644 index 000000000..4487dff4c --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/_components/quota-tabs-nav.tsx @@ -0,0 +1,34 @@ +"use client"; + +import { useTranslations } from "next-intl"; +import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; +import { Link, usePathname } from "@/i18n/routing"; + +const TABS = [ + { value: "users", href: "/dashboard/quotas/users", labelKey: "users" }, + { value: "model-limits", href: "/dashboard/quotas/model-limits", labelKey: "modelLimits" }, + { value: "providers", href: "/dashboard/quotas/providers", labelKey: "providers" }, +] as const; + +function resolveActiveValue(pathname: string): string { + const match = TABS.find((tab) => pathname === tab.href || pathname.startsWith(`${tab.href}/`)); + return match?.value ?? TABS[0].value; +} + +export function QuotaTabsNav() { + const pathname = usePathname(); + const t = useTranslations("quota.layout.tabs"); + const active = resolveActiveValue(pathname); + + return ( + + + {TABS.map((tab) => ( + + {t(tab.labelKey)} + + ))} + + + ); +} diff --git a/src/app/[locale]/dashboard/quotas/layout.tsx b/src/app/[locale]/dashboard/quotas/layout.tsx index 6e403256a..596dc887b 100644 --- a/src/app/[locale]/dashboard/quotas/layout.tsx +++ b/src/app/[locale]/dashboard/quotas/layout.tsx @@ -1,6 +1,5 @@ import { getTranslations } from "next-intl/server"; -import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; -import { Link } from "@/i18n/routing"; +import { QuotaTabsNav } from "./_components/quota-tabs-nav"; export default async function QuotasLayout({ children, @@ -19,18 +18,9 @@ export default async function QuotasLayout({

{t("description")}

- - - - {t("tabs.users")} - - - {t("tabs.providers")} - - + - {children} - +
{children}
); } diff --git a/src/app/[locale]/dashboard/quotas/model-limits/_components/edit-model-limit-dialog.tsx b/src/app/[locale]/dashboard/quotas/model-limits/_components/edit-model-limit-dialog.tsx new file mode 100644 index 000000000..fa5cfe6b4 --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/_components/edit-model-limit-dialog.tsx @@ -0,0 +1,400 @@ +"use client"; + +import { Loader2, Pencil } from "lucide-react"; +import { useRouter } from "next/navigation"; +import { useTranslations } from "next-intl"; +import { useMemo, useState, useTransition } from "react"; +import { toast } from "sonner"; +import { Button } from "@/components/ui/button"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; +import { + type ModelGroupLimitResponse, + type ModelGroupLimitUpsertInput, + upsertModelGroupLimit, +} from "@/lib/api-client/v1/actions/model-limits"; +import { CURRENCY_CONFIG, type CurrencyCode } from "@/lib/utils/currency"; +import { SearchableSelect } from "./searchable-select"; + +type SubjectType = "user" | "user_group" | "key"; +type ResetMode = "fixed" | "rolling"; + +export interface ModelLimitSelectableData { + users: { id: number; name: string }[]; + userGroups: { id: number; name: string }[]; + keys: { id: number; label: string }[]; + modelGroups: { id: number; name: string }[]; +} + +interface EditModelLimitDialogProps { + subjectType?: SubjectType; + subjectId?: number | null; + modelGroupId?: number | null; + modelGroupName?: string; + currencyCode?: CurrencyCode; + existing?: ModelGroupLimitResponse; + onSaved: () => void | Promise; + trigger?: React.ReactNode; + selectable?: ModelLimitSelectableData; +} + +function toInput(value: number | null | undefined): string { + if (value === null || value === undefined) return ""; + return String(value); +} + +function parseUsd(value: string): number | null { + const trimmed = value.trim(); + if (!trimmed) return null; + const parsed = Number.parseFloat(trimmed); + return Number.isFinite(parsed) ? parsed : null; +} + +export function EditModelLimitDialog({ + subjectType, + subjectId, + modelGroupId, + modelGroupName, + currencyCode = "USD", + existing, + onSaved, + trigger, + selectable, +}: EditModelLimitDialogProps) { + const t = useTranslations("quota.modelLimits.dialog"); + const tt = useTranslations("quota.modelLimits"); + const router = useRouter(); + const [open, setOpen] = useState(false); + const [isPending, startTransition] = useTransition(); + const isEdit = Boolean(existing); + const allowSelect = !isEdit && Boolean(selectable); + const currencySymbol = CURRENCY_CONFIG[currencyCode].symbol; + + const [selSubjectType, setSelSubjectType] = useState(subjectType ?? "user"); + const [selSubjectId, setSelSubjectId] = useState(subjectId ?? null); + const [selKeyValue, setSelKeyValue] = useState(""); + const [selModelGroupId, setSelModelGroupId] = useState(modelGroupId ?? null); + const [limit5h, setLimit5h] = useState(toInput(existing?.limit5hUsd)); + const [limitDaily, setLimitDaily] = useState(toInput(existing?.dailyLimitUsd)); + const [limitWeekly, setLimitWeekly] = useState(toInput(existing?.limitWeeklyUsd)); + const [limitMonthly, setLimitMonthly] = useState(toInput(existing?.limitMonthlyUsd)); + const [limitTotal, setLimitTotal] = useState(toInput(existing?.limitTotalUsd)); + const [resetMode, setResetMode] = useState(existing?.limit5hResetMode ?? "fixed"); + + const subjectLabel = useMemo(() => { + if (selSubjectType === "user") return tt("subjectTypeUser"); + if (selSubjectType === "user_group") return tt("subjectTypeUserGroup"); + return tt("subjectTypeKey"); + }, [selSubjectType, tt]); + + const subjectOptions = useMemo(() => { + if (!selectable) return []; + if (selSubjectType === "user") { + return selectable.users.map((u) => ({ value: String(u.id), label: u.name })); + } + if (selSubjectType === "user_group") { + return selectable.userGroups.map((g) => ({ value: String(g.id), label: g.name })); + } + return selectable.keys.map((k) => ({ value: String(k.id), label: k.label })); + }, [selectable, selSubjectType]); + + const modelGroupOptions = useMemo( + () => selectable?.modelGroups.map((g) => ({ value: String(g.id), label: g.name })) ?? [], + [selectable] + ); + + const effectiveModelGroupName = useMemo(() => { + if (!allowSelect) return modelGroupName ?? ""; + return selectable?.modelGroups.find((g) => g.id === selModelGroupId)?.name ?? ""; + }, [allowSelect, modelGroupName, selectable, selModelGroupId]); + + const resetForm = () => { + setSelSubjectType(subjectType ?? "user"); + setSelSubjectId(subjectId ?? null); + setSelKeyValue(""); + setSelModelGroupId(modelGroupId ?? null); + setLimit5h(toInput(existing?.limit5hUsd)); + setLimitDaily(toInput(existing?.dailyLimitUsd)); + setLimitWeekly(toInput(existing?.limitWeeklyUsd)); + setLimitMonthly(toInput(existing?.limitMonthlyUsd)); + setLimitTotal(toInput(existing?.limitTotalUsd)); + setResetMode(existing?.limit5hResetMode ?? "fixed"); + }; + + const handleOpenChange = (next: boolean) => { + if (next) resetForm(); + setOpen(next); + }; + + const handleSubjectTypeChange = (next: string) => { + setSelSubjectType(next as SubjectType); + setSelSubjectId(null); + setSelKeyValue(""); + }; + + const handleSubmit = (event: React.FormEvent) => { + event.preventDefault(); + + const finalSubjectType = allowSelect ? selSubjectType : subjectType; + const finalSubjectId = allowSelect ? selSubjectId : subjectId; + const finalModelGroupId = allowSelect ? selModelGroupId : modelGroupId; + + if (finalSubjectType === undefined) { + toast.error(t("subjectRequired")); + return; + } + if (finalSubjectType === "key") { + if (!selKeyValue.trim()) { + toast.error(t("subjectRequired")); + return; + } + } else if (finalSubjectId === null || finalSubjectId === undefined) { + toast.error(t("subjectRequired")); + return; + } + if (finalModelGroupId === null || finalModelGroupId === undefined) { + toast.error(t("modelGroupRequired")); + return; + } + + const limitFields = { + limit5hUsd: parseUsd(limit5h), + limit5hResetMode: resetMode, + dailyLimitUsd: parseUsd(limitDaily), + limitWeeklyUsd: parseUsd(limitWeekly), + limitMonthlyUsd: parseUsd(limitMonthly), + limitTotalUsd: parseUsd(limitTotal), + }; + + const body: ModelGroupLimitUpsertInput = + finalSubjectType === "key" + ? { + subjectType: finalSubjectType, + keyValue: selKeyValue.trim(), + modelGroupId: finalModelGroupId, + ...limitFields, + } + : { + subjectType: finalSubjectType, + subjectId: finalSubjectId!, + modelGroupId: finalModelGroupId, + ...limitFields, + }; + + startTransition(async () => { + const result = await upsertModelGroupLimit(body); + if (result.ok) { + toast.success(t("saveSuccess")); + setOpen(false); + await onSaved(); + router.refresh(); + } else { + toast.error(result.error ?? t("saveError")); + } + }); + }; + + return ( + + + {trigger ?? ( + + )} + + + + {isEdit ? t("editTitle") : t("addTitle")} + + {allowSelect && !effectiveModelGroupName + ? t("selectDescription") + : t("description", { group: effectiveModelGroupName })} + + + +
+ {allowSelect && ( +
+
+ + + + {tt("subjectTypeUser")} + {tt("subjectTypeUserGroup")} + {tt("subjectTypeKey")} + + +
+ +
+
+ + {selSubjectType === "key" ? ( + setSelKeyValue(e.target.value)} + placeholder="sk-..." + disabled={isPending} + className="h-9 font-mono text-xs" + /> + ) : ( + setSelSubjectId(v ? Number(v) : null)} + options={subjectOptions} + placeholder={t("subjectRequired")} + searchPlaceholder={tt("searchSubject")} + emptyText={tt("noResults")} + disabled={isPending} + className="w-full" + /> + )} +
+ +
+ + setSelModelGroupId(v ? Number(v) : null)} + options={modelGroupOptions} + placeholder={t("modelGroupRequired")} + searchPlaceholder={tt("searchModelGroup")} + emptyText={tt("noResults")} + disabled={isPending} + className="w-full" + /> +
+
+
+ )} + +
+
+ + setLimit5h(e.target.value)} + placeholder={t("unlimited")} + className="h-9" + /> +
+ +
+ + setLimitDaily(e.target.value)} + placeholder={t("unlimited")} + className="h-9" + /> +
+ +
+ + setLimitWeekly(e.target.value)} + placeholder={t("unlimited")} + className="h-9" + /> +
+ +
+ + setLimitMonthly(e.target.value)} + placeholder={t("unlimited")} + className="h-9" + /> +
+ +
+ + setLimitTotal(e.target.value)} + placeholder={t("unlimited")} + className="h-9" + /> +
+ +
+ + +
+
+ + + + +
+
+
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/_components/model-combobox.tsx b/src/app/[locale]/dashboard/quotas/model-limits/_components/model-combobox.tsx new file mode 100644 index 000000000..bd321bc85 --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/_components/model-combobox.tsx @@ -0,0 +1,138 @@ +"use client"; + +import { Check, ChevronsUpDown } from "lucide-react"; +import { useMemo, useState } from "react"; +import { Button } from "@/components/ui/button"; +import { + Command, + CommandGroup, + CommandInput, + CommandItem, + CommandList, +} from "@/components/ui/command"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { cn } from "@/lib/utils"; + +interface ModelComboboxProps { + value: string; + onChange: (value: string) => void; + availableModels: string[]; + existingModels?: string[]; + placeholder?: string; + searchPlaceholder?: string; + wildcardLabel?: string; + getCustomLabel?: (value: string) => string; + noResultsLabel?: string; + disabled?: boolean; +} + +export function ModelCombobox({ + value, + onChange, + availableModels, + existingModels = [], + placeholder = "", + searchPlaceholder, + wildcardLabel, + getCustomLabel, + noResultsLabel, + disabled = false, +}: ModelComboboxProps) { + const [open, setOpen] = useState(false); + const [search, setSearch] = useState(""); + + const trimmedSearch = search.trim(); + + const filteredModels = useMemo(() => { + const query = trimmedSearch.toLowerCase(); + return availableModels.filter( + (m) => m !== "*" && !existingModels.includes(m) && (!query || m.toLowerCase().includes(query)) + ); + }, [availableModels, existingModels, trimmedSearch]); + + const showWildcard = + !existingModels.includes("*") && + (!trimmedSearch || + "*".includes(trimmedSearch) || + "wildcard".includes(trimmedSearch.toLowerCase())); + + const showCustom = + trimmedSearch.length > 0 && + trimmedSearch !== "*" && + !availableModels.includes(trimmedSearch) && + !existingModels.includes(trimmedSearch); + + const hasContent = showWildcard || filteredModels.length > 0 || showCustom; + + const handleSelect = (selected: string) => { + onChange(selected); + setSearch(""); + setOpen(false); + }; + + const handleOpenChange = (next: boolean) => { + if (!next) setSearch(""); + setOpen(next); + }; + + return ( + + + + + + + + + {!hasContent && ( +

{noResultsLabel}

+ )} + {(showWildcard || filteredModels.length > 0) && ( + + {showWildcard && ( + handleSelect("*")}> + + * + {wildcardLabel && ( + {wildcardLabel} + )} + + )} + {filteredModels.map((model) => ( + handleSelect(model)}> + + {model} + + ))} + + )} + {showCustom && ( + + handleSelect(trimmedSearch)}> + {getCustomLabel ? getCustomLabel(trimmedSearch) : trimmedSearch} + + + )} +
+
+
+
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/_components/model-limits-client.tsx b/src/app/[locale]/dashboard/quotas/model-limits/_components/model-limits-client.tsx new file mode 100644 index 000000000..6e3ca9916 --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/_components/model-limits-client.tsx @@ -0,0 +1,401 @@ +"use client"; + +import { ChevronRight, Loader2, Plus, Search, Trash2 } from "lucide-react"; +import { useRouter } from "next/navigation"; +import { useTranslations } from "next-intl"; +import { Fragment, useCallback, useEffect, useMemo, useState, useTransition } from "react"; +import { toast } from "sonner"; +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, + AlertDialogTrigger, +} from "@/components/ui/alert-dialog"; +import { Button } from "@/components/ui/button"; +import { Card, CardContent } from "@/components/ui/card"; +import { Input } from "@/components/ui/input"; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; +import { + deleteModelGroupLimit, + listModelGroupLimits, + type ModelGroupLimitResponse, +} from "@/lib/api-client/v1/actions/model-limits"; +import { cn } from "@/lib/utils"; +import { CURRENCY_CONFIG, type CurrencyCode } from "@/lib/utils/currency"; +import type { ModelGroupWithMembers } from "@/repository/model-group"; +import type { ModelGroupLimitRecord } from "@/repository/model-group-limit"; +import type { UserGroupRow } from "@/repository/user-group"; +import { EditModelLimitDialog } from "./edit-model-limit-dialog"; +import { QuotaBoostDialog } from "./quota-boost-dialog"; + +type SubjectType = "user" | "user_group" | "key"; + +export interface UserItem { + id: number; + name: string; +} + +interface ModelLimitsClientProps { + modelGroups: ModelGroupWithMembers[]; + userGroups: UserGroupRow[]; + users: UserItem[]; + initialLimits: ModelGroupLimitRecord[]; + currencyCode?: string; + featureEnabled: boolean; + userGroupMembers: Record; + boostCounts: Record; +} + +function toDisplayLimits(records: ModelGroupLimitRecord[]): ModelGroupLimitResponse[] { + return records.map((r) => ({ + id: r.id, + subjectType: r.subjectType, + subjectId: r.subjectId, + modelGroupId: r.modelGroupId, + rpmLimit: r.rpmLimit, + limit5hUsd: r.limit5hUsd, + limit5hResetMode: r.limit5hResetMode, + dailyLimitUsd: r.dailyLimitUsd, + limitWeeklyUsd: r.limitWeeklyUsd, + limitMonthlyUsd: r.limitMonthlyUsd, + limitTotalUsd: r.limitTotalUsd, + limit5hCostResetAt: r.limit5hCostResetAt ? r.limit5hCostResetAt.toISOString() : null, + keyPreview: r.keyPreview ?? null, + })); +} + +export function ModelLimitsClient({ + modelGroups, + userGroups, + users, + initialLimits, + currencyCode = "USD", + featureEnabled: _featureEnabled, + userGroupMembers, + boostCounts, +}: ModelLimitsClientProps) { + const t = useTranslations("quota.modelLimits"); + const router = useRouter(); + const currencySymbol = CURRENCY_CONFIG[currencyCode as CurrencyCode]?.symbol ?? "$"; + + const [search, setSearch] = useState(""); + const [expandedGroups, setExpandedGroups] = useState>(new Set()); + const [limits, setLimits] = useState(() => + toDisplayLimits(initialLimits) + ); + const [loading, setLoading] = useState(false); + const [, startTransition] = useTransition(); + + const selectableData = useMemo( + () => ({ + users: users.map((u) => ({ id: u.id, name: u.name })), + userGroups: userGroups.map((g) => ({ id: g.id, name: g.name ?? g.tag })), + keys: [], + modelGroups: modelGroups.map((g) => ({ id: g.id, name: g.name })), + }), + [users, userGroups, modelGroups] + ); + + const loadLimits = useCallback(async () => { + setLoading(true); + const result = await listModelGroupLimits({}); + if (result.ok) { + setLimits(result.data); + } else { + setLimits([]); + toast.error(result.error ?? t("loadError")); + } + setLoading(false); + }, [t]); + + useEffect(() => { + void loadLimits(); + }, [loadLimits]); + + const handleDelete = (id: number) => { + startTransition(async () => { + const result = await deleteModelGroupLimit(id); + if (result.ok) { + toast.success(t("deleteSuccess")); + await loadLimits(); + router.refresh(); + } else { + toast.error(result.error ?? t("deleteError")); + } + }); + }; + + const resolveSubjectName = useCallback( + (limit: ModelGroupLimitResponse): string => { + if (limit.subjectType === "user") { + return users.find((u) => u.id === limit.subjectId)?.name ?? String(limit.subjectId); + } + if (limit.subjectType === "user_group") { + const g = userGroups.find((g) => g.id === limit.subjectId); + return g?.name ?? g?.tag ?? String(limit.subjectId); + } + if (limit.subjectType === "key") { + return limit.keyPreview ?? String(limit.subjectId); + } + return String(limit.subjectId); + }, + [users, userGroups] + ); + + const resolveGroupName = useCallback( + (limit: ModelGroupLimitResponse): string => + modelGroups.find((g) => g.id === limit.modelGroupId)?.name ?? String(limit.modelGroupId), + [modelGroups] + ); + + const subjectTypeLabel = useCallback( + (subjectType: string): string => { + if (subjectType === "user") return t("subjectTypeUser"); + if (subjectType === "user_group") return t("subjectTypeUserGroup"); + return t("subjectTypeKey"); + }, + [t] + ); + + const filteredLimits = useMemo(() => { + const query = search.trim().toLowerCase(); + if (!query) return limits; + return limits.filter((limit) => { + const subject = resolveSubjectName(limit).toLowerCase(); + const group = resolveGroupName(limit).toLowerCase(); + const type = subjectTypeLabel(limit.subjectType).toLowerCase(); + return subject.includes(query) || group.includes(query) || type.includes(query); + }); + }, [limits, search, resolveSubjectName, resolveGroupName, subjectTypeLabel]); + + const formatUsd = (value: number | null) => + value === null ? "—" : `${currencySymbol}${value.toFixed(2)}`; + + const boostCountFor = useCallback( + (userId: number, modelGroupId: number) => boostCounts[`${userId}:${modelGroupId}`] ?? 0, + [boostCounts] + ); + + const toggleGroup = (id: number) => { + setExpandedGroups((prev) => { + const next = new Set(prev); + if (next.has(id)) { + next.delete(id); + } else { + next.add(id); + } + return next; + }); + }; + + return ( +
+
+
+ + setSearch(e.target.value)} + placeholder={t("searchLimits")} + className="h-9 pl-8" + /> +
+ + + + {t("addLimit")} + + } + /> +
+ + + + {loading ? ( +
+ +
+ ) : limits.length === 0 ? ( +

{t("noData")}

+ ) : filteredLimits.length === 0 ? ( +

{t("noResults")}

+ ) : ( + + + + {t("table.subjectType")} + {t("table.subject")} + {t("table.modelGroup")} + {t("table.fiveHour")} + {t("table.daily")} + {t("table.weekly")} + {t("table.monthly")} + {t("table.total")} + {t("table.resetMode")} + {t("table.actions")} + + + + {filteredLimits.map((limit) => { + const members = + limit.subjectType === "user_group" + ? (userGroupMembers[limit.subjectId] ?? []) + : []; + const expandable = members.length > 0; + const expanded = expandedGroups.has(limit.id); + const groupName = resolveGroupName(limit); + + return ( + + + +
+ {expandable ? ( + + ) : ( + + )} + {subjectTypeLabel(limit.subjectType)} +
+
+ {resolveSubjectName(limit)} + {groupName} + {formatUsd(limit.limit5hUsd)} + + {formatUsd(limit.dailyLimitUsd)} + + + {formatUsd(limit.limitWeeklyUsd)} + + + {formatUsd(limit.limitMonthlyUsd)} + + + {formatUsd(limit.limitTotalUsd)} + + {t(`resetMode.${limit.limit5hResetMode}`)} + +
+ {limit.subjectType === "user" && ( + router.refresh()} + /> + )} + + + + + + + + {t("deleteConfirm.title")} + + {t("deleteConfirm.description", { group: groupName })} + + + + {t("deleteConfirm.cancel")} + handleDelete(limit.id)}> + {t("deleteConfirm.confirm")} + + + + +
+
+
+ + {expanded && + members.map((member) => ( + + + + {member.name} + + + {groupName} + + + + + + + + +
+ router.refresh()} + /> +
+
+
+ ))} +
+ ); + })} +
+
+ )} +
+
+ + + +

{t("semanticsNote")}

+
+
+
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/_components/model-limits-sub-nav.tsx b/src/app/[locale]/dashboard/quotas/model-limits/_components/model-limits-sub-nav.tsx new file mode 100644 index 000000000..328048d40 --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/_components/model-limits-sub-nav.tsx @@ -0,0 +1,45 @@ +"use client"; + +import { useTranslations } from "next-intl"; +import { Tabs, TabsList, TabsTrigger } from "@/components/ui/tabs"; +import { Link, usePathname } from "@/i18n/routing"; + +const SUB_TABS = [ + { value: "config", href: "/dashboard/quotas/model-limits", labelKey: "config" }, + { + value: "model-groups", + href: "/dashboard/quotas/model-limits/model-groups", + labelKey: "modelGroups", + }, + { + value: "user-groups", + href: "/dashboard/quotas/model-limits/user-groups", + labelKey: "userGroups", + }, +] as const; + +function resolveActiveValue(pathname: string): string { + const child = SUB_TABS.find( + (tab) => + tab.value !== "config" && (pathname === tab.href || pathname.startsWith(`${tab.href}/`)) + ); + return child?.value ?? "config"; +} + +export function ModelLimitsSubNav() { + const pathname = usePathname(); + const t = useTranslations("quota.layout.subTabs"); + const active = resolveActiveValue(pathname); + + return ( + + + {SUB_TABS.map((tab) => ( + + {t(tab.labelKey)} + + ))} + + + ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/_components/quota-boost-dialog.tsx b/src/app/[locale]/dashboard/quotas/model-limits/_components/quota-boost-dialog.tsx new file mode 100644 index 000000000..0817a042d --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/_components/quota-boost-dialog.tsx @@ -0,0 +1,82 @@ +"use client"; + +import { Zap } from "lucide-react"; +import { useTranslations } from "next-intl"; +import { useState } from "react"; +import { Button } from "@/components/ui/button"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { cn } from "@/lib/utils"; +import type { CurrencyCode } from "@/lib/utils/currency"; +import { QuotaBoostPanel } from "./quota-boost-panel"; + +interface QuotaBoostDialogProps { + userId: number; + userName: string; + modelGroupId: number; + modelGroupName: string; + currencyCode?: CurrencyCode; + boostCount?: number; + onChanged?: () => void; +} + +export function QuotaBoostDialog({ + userId, + userName, + modelGroupId, + modelGroupName, + currencyCode = "USD", + boostCount = 0, + onChanged, +}: QuotaBoostDialogProps) { + const t = useTranslations("quota.modelLimits"); + const [open, setOpen] = useState(false); + + const hasBoost = boostCount > 0; + const triggerTitle = hasBoost + ? t("boosts.configuredCount", { count: boostCount }) + : t("boosts.panelTitle"); + + return ( + + + + + + + {t("boosts.panelTitle")} + + {t("boosts.dialogDescription", { user: userName, group: modelGroupName })} + + + + + + ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/_components/quota-boost-panel.tsx b/src/app/[locale]/dashboard/quotas/model-limits/_components/quota-boost-panel.tsx new file mode 100644 index 000000000..dc699f6c4 --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/_components/quota-boost-panel.tsx @@ -0,0 +1,301 @@ +"use client"; + +import { Loader2, Plus, Trash2 } from "lucide-react"; +import { useTranslations } from "next-intl"; +import { useCallback, useEffect, useState, useTransition } from "react"; +import { toast } from "sonner"; +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, + AlertDialogTrigger, +} from "@/components/ui/alert-dialog"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; +import { + createQuotaBoost, + deleteQuotaBoost, + listQuotaBoosts, + type QuotaBoostGrantResponse, +} from "@/lib/api-client/v1/actions/quota-boosts"; +import type { CurrencyCode } from "@/lib/utils/currency"; +import { CURRENCY_CONFIG } from "@/lib/utils/currency"; + +type BoostWindow = "5h" | "daily" | "weekly" | "monthly" | "total"; + +const BOOST_WINDOWS: BoostWindow[] = ["5h", "daily", "weekly", "monthly", "total"]; + +interface QuotaBoostPanelProps { + userId: number; + userName: string; + modelGroupId: number; + modelGroupName: string; + currencyCode?: CurrencyCode; + onChanged?: () => void; +} + +export function QuotaBoostPanel({ + userId, + modelGroupId, + currencyCode = "USD", + onChanged, +}: QuotaBoostPanelProps) { + const t = useTranslations("quota.quotaBoosts"); + const currencySymbol = CURRENCY_CONFIG[currencyCode]?.symbol ?? "$"; + + const [boosts, setBoosts] = useState([]); + const [loading, setLoading] = useState(false); + const [, startTransition] = useTransition(); + + const [window, setWindow] = useState("daily"); + const [amount, setAmount] = useState(""); + const [validFrom, setValidFrom] = useState(""); + const [validTo, setValidTo] = useState(""); + const [note, setNote] = useState(""); + const [submitting, setSubmitting] = useState(false); + + const loadBoosts = useCallback(async () => { + setLoading(true); + const result = await listQuotaBoosts({ userId, modelGroupId }); + if (result.ok) { + setBoosts(result.data); + } else { + toast.error(result.error ?? t("errors.list_failed")); + } + setLoading(false); + }, [userId, modelGroupId, t]); + + useEffect(() => { + void loadBoosts(); + }, [loadBoosts]); + + const handleCreate = async (event: React.FormEvent) => { + event.preventDefault(); + const parsedAmount = Number.parseFloat(amount); + if (!Number.isFinite(parsedAmount) || parsedAmount <= 0) { + toast.error(t("invalidAmount")); + return; + } + if (!validFrom || !validTo) { + toast.error(t("validityRequired")); + return; + } + if (new Date(validTo) <= new Date(validFrom)) { + toast.error(t("errors.invalid_validity_range")); + return; + } + + setSubmitting(true); + // The datetime-local input yields a zoneless "YYYY-MM-DDTHH:mm" string, but the API + // requires ISO 8601 with a timezone offset. Interpret it as local wall-clock time and + // serialize to a UTC instant so the grant activates at the moment the admin picked. + const result = await createQuotaBoost({ + userId, + modelGroupId, + window, + amountUsd: parsedAmount, + validFrom: new Date(validFrom).toISOString(), + validTo: new Date(validTo).toISOString(), + note: note.trim() || null, + }); + setSubmitting(false); + + if (result.ok) { + toast.success(t("createSuccess")); + setAmount(""); + setValidFrom(""); + setValidTo(""); + setNote(""); + await loadBoosts(); + onChanged?.(); + } else { + toast.error(result.error ?? t("errors.create_failed")); + } + }; + + const handleDelete = (id: number) => { + startTransition(async () => { + const result = await deleteQuotaBoost(id); + if (result.ok) { + toast.success(t("deleteSuccess")); + await loadBoosts(); + onChanged?.(); + } else { + toast.error(result.error ?? t("errors.delete_failed")); + } + }); + }; + + const formatDate = (iso: string) => { + try { + return new Date(iso).toLocaleString(); + } catch { + return iso; + } + }; + + return ( +
+

{t("note")}

+ +
+
+ + +
+ +
+ + setAmount(e.target.value)} + placeholder="0.00" + className="h-9" + required + /> +
+ +
+ + setValidFrom(e.target.value)} + className="h-9" + required + /> +
+ +
+ + setValidTo(e.target.value)} + className="h-9" + required + /> +
+ +
+ + setNote(e.target.value)} + placeholder={t("notePlaceholder")} + className="h-9" + maxLength={500} + /> +
+ +
+ +
+
+ + {loading ? ( +
+ +
+ ) : boosts.length === 0 ? ( +

{t("noBoosts")}

+ ) : ( + + + + {t("table.window")} + {t("table.amount")} + {t("table.validFrom")} + {t("table.validTo")} + {t("table.note")} + {t("table.actions")} + + + + {boosts.map((boost) => ( + + {t(`window_${boost.window}`)} + + {currencySymbol} + {Number(boost.amountUsd).toFixed(2)} + + {formatDate(boost.validFrom)} + {formatDate(boost.validTo)} + {boost.note ?? "—"} + +
+ + + + + + + {t("revokeConfirm.title")} + + {t("revokeConfirm.description")} + + + + {t("revokeConfirm.cancel")} + handleDelete(boost.id)}> + {t("revokeConfirm.confirm")} + + + + +
+
+
+ ))} +
+
+ )} +
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/_components/searchable-select.tsx b/src/app/[locale]/dashboard/quotas/model-limits/_components/searchable-select.tsx new file mode 100644 index 000000000..e3873cf19 --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/_components/searchable-select.tsx @@ -0,0 +1,94 @@ +"use client"; + +import { Check, ChevronsUpDown } from "lucide-react"; +import { useState } from "react"; +import { Button } from "@/components/ui/button"; +import { + Command, + CommandEmpty, + CommandGroup, + CommandInput, + CommandItem, + CommandList, +} from "@/components/ui/command"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { cn } from "@/lib/utils"; + +export interface SearchableSelectOption { + value: string; + label: string; +} + +interface SearchableSelectProps { + value: string; + onValueChange: (value: string) => void; + options: SearchableSelectOption[]; + placeholder?: string; + searchPlaceholder?: string; + emptyText?: string; + disabled?: boolean; + className?: string; +} + +export function SearchableSelect({ + value, + onValueChange, + options, + placeholder, + searchPlaceholder = "Search...", + emptyText = "No results.", + disabled = false, + className, +}: SearchableSelectProps) { + const [open, setOpen] = useState(false); + + const selectedOption = options.find((opt) => opt.value === value); + const selectedLabel = selectedOption?.label ?? placeholder ?? value; + + return ( + + + + + + + + + {emptyText} + + {options.map((opt) => ( + { + onValueChange(opt.value); + setOpen(false); + }} + > + + {opt.label} + + ))} + + + + + + ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/layout.tsx b/src/app/[locale]/dashboard/quotas/model-limits/layout.tsx new file mode 100644 index 000000000..dbfbaef6d --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/layout.tsx @@ -0,0 +1,10 @@ +import { ModelLimitsSubNav } from "./_components/model-limits-sub-nav"; + +export default function ModelLimitsLayout({ children }: { children: React.ReactNode }) { + return ( +
+ +
{children}
+
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/create-model-group-dialog.tsx b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/create-model-group-dialog.tsx new file mode 100644 index 000000000..e7845031a --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/create-model-group-dialog.tsx @@ -0,0 +1,151 @@ +"use client"; + +import { Loader2 } from "lucide-react"; +import { useRouter } from "next/navigation"; +import { useTranslations } from "next-intl"; +import { useState, useTransition } from "react"; +import { toast } from "sonner"; +import { Button } from "@/components/ui/button"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { addModelGroupMember, createModelGroup } from "@/lib/api-client/v1/actions/model-groups"; +import { ModelMembersSelect } from "./model-members-select"; + +interface CreateModelGroupDialogProps { + availableModels: string[]; + onSaved: () => void | Promise; + trigger?: React.ReactNode; +} + +export function CreateModelGroupDialog({ + availableModels, + onSaved, + trigger, +}: CreateModelGroupDialogProps) { + const t = useTranslations("quota.modelGroups"); + const router = useRouter(); + const [open, setOpen] = useState(false); + const [isPending, startTransition] = useTransition(); + + const [name, setName] = useState(""); + const [description, setDescription] = useState(""); + const [members, setMembers] = useState([]); + + const resetForm = () => { + setName(""); + setDescription(""); + setMembers([]); + }; + + const handleOpenChange = (next: boolean) => { + if (next) resetForm(); + setOpen(next); + }; + + const handleSubmit = () => { + startTransition(async () => { + if (!name.trim()) { + toast.error(t("nameRequired")); + return; + } + + const result = await createModelGroup({ + name: name.trim(), + description: description.trim() || null, + }); + if (!result.ok) { + toast.error(result.error || t("createError")); + return; + } + + const errors: string[] = []; + for (const model of members) { + const r = await addModelGroupMember(result.data.id, model); + if (!r.ok) errors.push(`${model}: ${r.error}`); + } + + if (errors.length > 0) { + toast.error(t("memberUpdatePartialError", { errors: errors.join("; ") })); + } else { + toast.success(t("createSuccess")); + } + + setOpen(false); + await onSaved(); + router.refresh(); + }); + }; + + return ( + + {trigger ? ( + {trigger} + ) : ( + + + + )} + + + {t("dialog.addTitle")} + {t("dialog.addDescription")} + +
+
+ + setName(e.target.value)} + placeholder={t("dialog.namePlaceholder")} + disabled={isPending} + /> +
+
+ + setDescription(e.target.value)} + placeholder={t("dialog.descriptionPlaceholder")} + disabled={isPending} + /> +
+
+ + +

{t("dialog.memberConflictNote")}

+
+
+ + + + +
+
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/manage-members-dialog.tsx b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/manage-members-dialog.tsx new file mode 100644 index 000000000..32e95f30f --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/manage-members-dialog.tsx @@ -0,0 +1,172 @@ +"use client"; + +import { Loader2, Settings2 } from "lucide-react"; +import { useRouter } from "next/navigation"; +import { useTranslations } from "next-intl"; +import { useState, useTransition } from "react"; +import { toast } from "sonner"; +import { Button } from "@/components/ui/button"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { + addModelGroupMember, + removeModelGroupMember, + updateModelGroup, +} from "@/lib/api-client/v1/actions/model-groups"; +import { ModelMembersSelect } from "./model-members-select"; + +interface GroupItem { + id: number; + name: string; + description: string | null; + members: string[]; +} + +interface ManageMembersDialogProps { + group: GroupItem; + availableModels: string[]; + onSaved: () => void | Promise; +} + +export function ManageMembersDialog({ group, availableModels, onSaved }: ManageMembersDialogProps) { + const t = useTranslations("quota.modelGroups"); + const router = useRouter(); + const [open, setOpen] = useState(false); + const [isPending, startTransition] = useTransition(); + + const [name, setName] = useState(group.name); + const [description, setDescription] = useState(group.description ?? ""); + const [members, setMembers] = useState(group.members); + + const resetForm = () => { + setName(group.name); + setDescription(group.description ?? ""); + setMembers(group.members); + }; + + const handleOpenChange = (next: boolean) => { + if (next) resetForm(); + setOpen(next); + }; + + const handleSave = () => { + startTransition(async () => { + if (!name.trim()) { + toast.error(t("nameRequired")); + return; + } + + const metaResult = await updateModelGroup(group.id, { + name: name.trim(), + description: description.trim() || null, + }); + if (!metaResult.ok) { + toast.error(metaResult.error || t("updateError")); + return; + } + + const originalMembers = new Set(group.members); + const nextMembers = new Set(members); + + const toAdd = members.filter((m) => !originalMembers.has(m)); + const toRemove = group.members.filter((m) => !nextMembers.has(m)); + + const errors: string[] = []; + + for (const model of toAdd) { + const r = await addModelGroupMember(group.id, model); + if (!r.ok) errors.push(`${model}: ${r.error}`); + } + + for (const model of toRemove) { + const r = await removeModelGroupMember(group.id, model); + if (!r.ok) errors.push(`${model}: ${r.error}`); + } + + if (errors.length > 0) { + toast.error(t("memberUpdatePartialError", { errors: errors.join("; ") })); + } else { + toast.success(t("updateSuccess")); + } + + setOpen(false); + await onSaved(); + router.refresh(); + }); + }; + + return ( + + + + + + + {t("dialog.manageTitle", { name: group.name })} + {t("dialog.manageDescription")} + + +
+
+ + setName(e.target.value)} + placeholder={t("dialog.namePlaceholder")} + disabled={isPending} + /> +
+ +
+ + setDescription(e.target.value)} + placeholder={t("dialog.descriptionPlaceholder")} + disabled={isPending} + /> +
+ +
+ + +

{t("dialog.memberConflictNote")}

+
+
+ + + + + +
+
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/model-group-client.tsx b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/model-group-client.tsx new file mode 100644 index 000000000..1c96e332b --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/model-group-client.tsx @@ -0,0 +1,195 @@ +"use client"; + +import { Loader2, Plus, Trash2 } from "lucide-react"; +import { useRouter } from "next/navigation"; +import { useTranslations } from "next-intl"; +import { useCallback, useEffect, useState, useTransition } from "react"; +import { toast } from "sonner"; +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, + AlertDialogTrigger, +} from "@/components/ui/alert-dialog"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Card, CardContent } from "@/components/ui/card"; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; +import { + deleteModelGroup, + listModelGroups, + type ModelGroupResponse, +} from "@/lib/api-client/v1/actions/model-groups"; +import { CreateModelGroupDialog } from "./create-model-group-dialog"; +import { ManageMembersDialog } from "./manage-members-dialog"; + +type GroupItem = Pick< + ModelGroupResponse, + "id" | "name" | "description" | "isSingleton" | "members" +>; + +interface ModelGroupClientProps { + initialGroups: GroupItem[]; + availableModels: string[]; +} + +export function ModelGroupClient({ initialGroups, availableModels }: ModelGroupClientProps) { + const t = useTranslations("quota.modelGroups"); + const router = useRouter(); + const [groups, setGroups] = useState(initialGroups); + const [loading, setLoading] = useState(false); + const [, startTransition] = useTransition(); + + const loadGroups = useCallback(async () => { + setLoading(true); + try { + const result = await listModelGroups(); + if (result.ok) { + setGroups(result.data); + } else { + toast.error(result.error || t("loadError")); + } + } finally { + setLoading(false); + } + }, [t]); + + useEffect(() => { + setGroups(initialGroups); + }, [initialGroups]); + + const handleDelete = (id: number, name: string) => { + startTransition(async () => { + const result = await deleteModelGroup(id); + if (result.ok) { + toast.success(t("deleteSuccess", { name })); + await loadGroups(); + router.refresh(); + } else { + toast.error(result.error || t("deleteError")); + } + }); + }; + + return ( +
+
+

{t("totalCount", { count: groups.length })}

+ + + {t("addGroup")} + + } + /> +
+ + + + {loading ? ( +
+ +
+ ) : groups.length === 0 ? ( +

{t("noData")}

+ ) : ( + + + + {t("table.name")} + {t("table.description")} + {t("table.members")} + {t("table.type")} + {t("table.actions")} + + + + {groups.map((group) => ( + + {group.name} + + {group.description ?? "—"} + + +
+ {group.members.length === 0 ? ( + {t("noMembers")} + ) : ( + group.members.slice(0, 3).map((model) => ( + + {model} + + )) + )} + {group.members.length > 3 && ( + + +{group.members.length - 3} + + )} +
+
+ + {group.members.length === 1 ? ( + {t("singleton")} + ) : ( + {t("group")} + )} + + +
+ + + + + + + + {t("deleteConfirm.title")} + + {t("deleteConfirm.description", { name: group.name })} + + + + {t("deleteConfirm.cancel")} + handleDelete(group.id, group.name)} + className="bg-destructive text-destructive-foreground hover:bg-destructive/90" + > + {t("deleteConfirm.confirm")} + + + + +
+
+
+ ))} +
+
+ )} +
+
+
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/model-members-select.tsx b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/model-members-select.tsx new file mode 100644 index 000000000..2fcb7c1f1 --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/_components/model-members-select.tsx @@ -0,0 +1,157 @@ +"use client"; + +import { Check, ChevronsUpDown, X } from "lucide-react"; +import { useMemo, useState } from "react"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { + Command, + CommandGroup, + CommandInput, + CommandItem, + CommandList, +} from "@/components/ui/command"; +import { Popover, PopoverContent, PopoverTrigger } from "@/components/ui/popover"; +import { cn } from "@/lib/utils"; + +interface ModelMembersSelectProps { + value: string[]; + onChange: (next: string[]) => void; + availableModels: string[]; + placeholder?: string; + searchPlaceholder?: string; + noResultsLabel?: string; + selectedLabel?: string; + disabled?: boolean; +} + +export function ModelMembersSelect({ + value, + onChange, + availableModels, + placeholder = "", + searchPlaceholder, + noResultsLabel, + selectedLabel, + disabled = false, +}: ModelMembersSelectProps) { + const [open, setOpen] = useState(false); + const [search, setSearch] = useState(""); + + const trimmedSearch = search.trim(); + + const filteredModels = useMemo(() => { + const query = trimmedSearch.toLowerCase(); + return availableModels.filter((m) => !query || m.toLowerCase().includes(query)); + }, [availableModels, trimmedSearch]); + + const showCustom = + trimmedSearch.length > 0 && + !availableModels.some((m) => m.toLowerCase() === trimmedSearch.toLowerCase()); + + const hasContent = filteredModels.length > 0 || showCustom; + + const toggle = (model: string) => { + onChange(value.includes(model) ? value.filter((m) => m !== model) : [...value, model]); + setSearch(""); + }; + + const remove = (model: string) => { + onChange(value.filter((m) => m !== model)); + }; + + const handleOpenChange = (next: boolean) => { + if (!next) setSearch(""); + setOpen(next); + }; + + const triggerText = value.length === 0 ? placeholder : (selectedLabel ?? String(value.length)); + + return ( +
+ + + + + e.stopPropagation()} + onTouchMove={(e) => e.stopPropagation()} + > + + + + {!hasContent && ( +

{noResultsLabel}

+ )} + {filteredModels.length > 0 && ( + + {filteredModels.map((model) => ( + toggle(model)}> + + {model} + + ))} + + )} + {showCustom && ( + + toggle(trimmedSearch)}> + + {trimmedSearch} + + + )} +
+
+
+
+ + {value.length > 0 && ( +
+ {value.map((model) => ( + + {model} + + + ))} +
+ )} +
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/model-groups/page.tsx b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/page.tsx new file mode 100644 index 000000000..ab9e26aef --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/model-groups/page.tsx @@ -0,0 +1,42 @@ +import { Info } from "lucide-react"; +import { getTranslations } from "next-intl/server"; +import { Alert, AlertDescription } from "@/components/ui/alert"; +import { redirect } from "@/i18n/routing"; +import { getSession } from "@/lib/auth"; +import { listModelGroups } from "@/repository/model-group"; +import { findAllProviderSupportedModels } from "@/repository/provider"; +import { ModelGroupClient } from "./_components/model-group-client"; + +export const dynamic = "force-dynamic"; + +export default async function ModelGroupsPage({ params }: { params: Promise<{ locale: string }> }) { + const { locale } = await params; + const session = await getSession(); + + if (!session || session.user.role !== "admin") { + return redirect({ href: session ? "/dashboard/my-quota" : "/login", locale }); + } + + const t = await getTranslations({ locale, namespace: "quota.modelGroups" }); + + const [groups, availableModels] = await Promise.all([ + listModelGroups(), + findAllProviderSupportedModels(), + ]); + + return ( +
+
+

{t("title")}

+

{t("description")}

+
+ + + + {t("semanticsNote")} + + + +
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/page.tsx b/src/app/[locale]/dashboard/quotas/model-limits/page.tsx new file mode 100644 index 000000000..59e46e60b --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/page.tsx @@ -0,0 +1,99 @@ +import { Info } from "lucide-react"; +import { getTranslations } from "next-intl/server"; +import { Alert, AlertDescription } from "@/components/ui/alert"; +import { redirect } from "@/i18n/routing"; +import { getSession } from "@/lib/auth"; +import { isModelRateLimitEnabled } from "@/lib/model-rate-limit/types"; +import type { ModelGroupWithMembers } from "@/repository/model-group"; +import { listModelGroups } from "@/repository/model-group"; +import type { ModelGroupLimitRecord } from "@/repository/model-group-limit"; +import { listModelGroupLimits } from "@/repository/model-group-limit"; +import { listAllActiveAndFutureGrants } from "@/repository/quota-boost"; +import { getSystemSettings } from "@/repository/system-config"; +import { searchUsersForFilter } from "@/repository/user"; +import type { UserGroupRow } from "@/repository/user-group"; +import { listUserGroupMembers, listUserGroups } from "@/repository/user-group"; +import { ModelLimitsClient } from "./_components/model-limits-client"; + +export const dynamic = "force-dynamic"; + +export interface UserItem { + id: number; + name: string; +} + +export interface PageInitialData { + modelGroups: ModelGroupWithMembers[]; + userGroups: UserGroupRow[]; + users: UserItem[]; + initialLimits: ModelGroupLimitRecord[]; + currencyCode: string; + featureEnabled: boolean; + userGroupMembers: Record; + boostCounts: Record; +} + +export default async function ModelLimitsPage({ params }: { params: Promise<{ locale: string }> }) { + const { locale } = await params; + const session = await getSession(); + + if (!session || session.user.role !== "admin") { + return redirect({ href: session ? "/dashboard/my-quota" : "/login", locale }); + } + + const t = await getTranslations({ locale, namespace: "quota.modelLimits" }); + + const [modelGroups, userGroups, users, initialLimits, systemSettings, boostGrants] = + await Promise.all([ + listModelGroups(), + listUserGroups(), + searchUsersForFilter(undefined, 2000), + listModelGroupLimits({}), + getSystemSettings(), + listAllActiveAndFutureGrants(), + ]); + + const featureEnabled = isModelRateLimitEnabled(); + + const boostCounts: Record = {}; + for (const grant of boostGrants) { + const key = `${grant.userId}:${grant.modelGroupId}`; + boostCounts[key] = (boostCounts[key] ?? 0) + 1; + } + + const memberRows = await listUserGroupMembers(userGroups.map((g) => g.tag)); + const tagToGroupId = new Map(userGroups.map((g) => [g.tag, g.id])); + const userGroupMembers: Record = {}; + for (const member of memberRows) { + const groupId = tagToGroupId.get(member.tag); + if (groupId === undefined) continue; + (userGroupMembers[groupId] ??= []).push({ id: member.userId, name: member.userName }); + } + + return ( +
+
+

{t("title")}

+

{t("description")}

+
+ + {!featureEnabled && ( + + + {t("disabledNotice")} + + )} + + +
+ ); +} diff --git a/src/app/[locale]/dashboard/quotas/model-limits/user-groups/_components/user-group-client.tsx b/src/app/[locale]/dashboard/quotas/model-limits/user-groups/_components/user-group-client.tsx new file mode 100644 index 000000000..1a54ab76d --- /dev/null +++ b/src/app/[locale]/dashboard/quotas/model-limits/user-groups/_components/user-group-client.tsx @@ -0,0 +1,335 @@ +"use client"; + +import { Pencil, Trash2, Users } from "lucide-react"; +import { useRouter } from "next/navigation"; +import { useTranslations } from "next-intl"; +import { useState } from "react"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Textarea } from "@/components/ui/textarea"; +import { + createUserGroup, + deleteUserGroup, + updateUserGroup, +} from "@/lib/api-client/v1/actions/user-groups"; + +interface GroupItem { + id: number; + tag: string; + name: string | null; + description: string | null; + memberCount?: number | null; +} + +interface UserGroupClientProps { + groups: GroupItem[]; + availableTags: string[]; +} + +const UNREGISTERED_SENTINEL = "__unregistered__"; + +export function UserGroupClient({ groups, availableTags }: UserGroupClientProps) { + const t = useTranslations("quota.userGroups"); + const router = useRouter(); + + const [createOpen, setCreateOpen] = useState(false); + const [editGroup, setEditGroup] = useState(null); + const [deleteGroup, setDeleteGroup] = useState(null); + + const [selectedTag, setSelectedTag] = useState(""); + const [formName, setFormName] = useState(""); + const [formDescription, setFormDescription] = useState(""); + const [submitting, setSubmitting] = useState(false); + const [error, setError] = useState(null); + + const registeredTags = new Set(groups.map((g) => g.tag)); + const unregisteredTags = availableTags.filter((tag) => !registeredTags.has(tag)); + + function openCreate() { + setSelectedTag(""); + setFormName(""); + setFormDescription(""); + setError(null); + setCreateOpen(true); + } + + function openEdit(group: GroupItem) { + setFormName(group.name ?? ""); + setFormDescription(group.description ?? ""); + setError(null); + setEditGroup(group); + } + + async function handleCreate() { + if (!selectedTag || selectedTag === UNREGISTERED_SENTINEL) { + setError(t("tagRequired")); + return; + } + setSubmitting(true); + setError(null); + try { + const result = await createUserGroup({ + tag: selectedTag, + name: formName.trim() || null, + description: formDescription.trim() || null, + }); + if (!result.ok) { + setError(result.error); + return; + } + setCreateOpen(false); + router.refresh(); + } finally { + setSubmitting(false); + } + } + + async function handleEdit() { + if (!editGroup) return; + setSubmitting(true); + setError(null); + try { + const result = await updateUserGroup(editGroup.id, { + name: formName.trim() || null, + description: formDescription.trim() || null, + }); + if (!result.ok) { + setError(result.error); + return; + } + setEditGroup(null); + router.refresh(); + } finally { + setSubmitting(false); + } + } + + async function handleDelete() { + if (!deleteGroup) return; + setSubmitting(true); + setError(null); + try { + const result = await deleteUserGroup(deleteGroup.id); + if (!result.ok) { + setError(result.error); + return; + } + setDeleteGroup(null); + router.refresh(); + } finally { + setSubmitting(false); + } + } + + return ( +
+
+

{t("totalCount", { count: groups.length })}

+ +
+ + {groups.length === 0 ? ( + + + +

{t("noGroups")}

+
+
+ ) : ( +
+ {groups.map((group) => ( + + +
+ {group.name || group.tag} + + {group.tag} + +
+
+ + +
+
+ + {group.description && ( +

{group.description}

+ )} +
+ + {group.memberCount} + {t("members")} +
+
+
+ ))} +
+ )} + + + + + {t("createDialog.title")} + {t("createDialog.description")} + +
+
+ + +
+
+ + setFormName(e.target.value)} + placeholder={t("form.namePlaceholder")} + /> +
+
+ +