From a54437e6bbe8ff77c3972dd77b61af4dc1305e87 Mon Sep 17 00:00:00 2001 From: timcsy Date: Sat, 13 Jun 2026 14:55:10 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat(quota):=20cost-based=20monthly=20quota?= =?UTF-8?q?=20=E2=80=94=20per-allocation=20USD=20spend=20cap=20(Phase=2033?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the governance gap where non-token endpoints (OCR/image/TTS/realtime/…) bypassed the token-only monthly quota entirely. Adds an optional per-allocation monthly USD cap that governs EVERY endpoint via the cost common denominator. - model: allocations.quota_cost_usd_per_month (migration 0020, additive nullable); CallOutcome.rejected_cost_quota_exceeded (VARCHAR enum, no migration). - quota service: current_month_cost (sum cost_usd, hits the existing index) + is_over_cost_quota; mirrors the token quota helpers. Unpriced (cost_usd NULL) calls coalesce to 0 → not counted, not blocked (honest; admin must price them). - preflight: cost check alongside the token check — either cap trips. Rejected calls are recorded with the new outcome (attributed to the allocation). - realtime: in-connection cost watch extends the revocation watcher — committed month cost + this connection's in-flight cost ≥ cap → close + bill accrued time (any close path bills, FR-004). New close_reason "cost_exceeded". - admin: create/patch accept quota_cost_usd_per_month (>=0, null clears); patch audits allocation_cost_quota_updated (FR-008). Frontend quota dialog gains the cost-cap field. - members: /me/allocations exposes quota_cost_usd_per_month + cost_used_this_month; the allocation card shows "本月花費 / 上限" with a near-cap warning. - adaptive pool isolation: the rebalance pool only touches token quota, so the cost cap is untouched —固化 by an integration test (SC-005). Fix: the 0015 migration-replay test seeded an allocation via the HEAD-schema ORM; switched to raw SQL so a later column (0020) doesn't break it (experience lesson). Full suite 759 passed (zero regression); ruff+mypy clean; frontend tsc + 164 vitest + build green. Existing quota contract tests untouched (SC-003). Co-Authored-By: Claude Opus 4.8 (1M context) --- CLAUDE.md | 6 +- alembic/versions/0020_cost_quota.py | 29 +++ frontend/src/components/allocation-list.tsx | 16 ++ frontend/src/routes/admin/allocations.tsx | 34 +++- .../046-cost-quota/checklists/requirements.md | 40 +++++ specs/046-cost-quota/contracts/cost-quota.md | 69 ++++++++ specs/046-cost-quota/data-model.md | 44 +++++ specs/046-cost-quota/plan.md | 80 +++++++++ specs/046-cost-quota/quickstart.md | 29 +++ specs/046-cost-quota/research.md | 96 ++++++++++ specs/046-cost-quota/spec.md | 109 ++++++++++++ specs/046-cost-quota/tasks.md | 147 ++++++++++++++++ src/ai_api/api/allocations.py | 33 +++- src/ai_api/api/me.py | 10 ++ src/ai_api/api/schemas.py | 5 + src/ai_api/models/allocation.py | 8 +- src/ai_api/models/auth_audit.py | 2 + src/ai_api/models/call_record.py | 1 + src/ai_api/proxy/preflight.py | 22 ++- src/ai_api/proxy/realtime.py | 77 +++++++- src/ai_api/proxy/router.py | 1 + src/ai_api/services/allocations.py | 3 + src/ai_api/services/quota.py | 25 +++ tests/contract/test_cost_quota.py | 165 ++++++++++++++++++ tests/contract/test_me_allocations.py | 32 ++++ tests/contract/test_realtime_transcription.py | 33 ++++ .../integration/test_credential_migration.py | 48 +++-- .../integration/test_quota_pool_rebalance.py | 33 ++++ tests/unit/test_quota_check.py | 30 +++- 29 files changed, 1182 insertions(+), 45 deletions(-) create mode 100644 alembic/versions/0020_cost_quota.py create mode 100644 specs/046-cost-quota/checklists/requirements.md create mode 100644 specs/046-cost-quota/contracts/cost-quota.md create mode 100644 specs/046-cost-quota/data-model.md create mode 100644 specs/046-cost-quota/plan.md create mode 100644 specs/046-cost-quota/quickstart.md create mode 100644 specs/046-cost-quota/research.md create mode 100644 specs/046-cost-quota/spec.md create mode 100644 specs/046-cost-quota/tasks.md create mode 100644 tests/contract/test_cost_quota.py diff --git a/CLAUDE.md b/CLAUDE.md index 1393877..4d09128 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,6 +1,6 @@ # ai-api Development Guidelines -Auto-generated from all feature plans. Last updated: 2026-06-12 +Auto-generated from all feature plans. Last updated: 2026-06-13 ## Active Technologies - Python 3.11+(同 Phase 1) (002-auth-membership) @@ -68,6 +68,8 @@ Auto-generated from all feature plans. Last updated: 2026-06-12 - PostgreSQL(生產)/ SQLite(dev、CI);**不新增表/欄/migration**——沿用 0019 的 `call_records.{quantity,unit}` 與 `price_list.{price_unit,price_per_unit_usd}`,新單位 `image`/`query` 為字串值 (042-endpoint-registry) - Python 3.11+(後端為主)/ TypeScript strict + React 19(前端僅目錄顯示 realtime 類型 + 連線範例,極少量) + FastAPI(WebSocket — starlette 內建,**專案首次使用**)、SQLAlchemy 2.x async、Pydantic v2(皆既有);**`websockets`(直連 Azure realtime WS 的 async client,提為直接依賴——已隨 uvicorn/litellm 在 image,現宣告為直接依賴)**;既有 `proxy/preflight.py`、計費(`services/pricing.py` 的 `calculate_unit_cost`)、audit。**realtime 不經 litellm**(其 realtime 是 Proxy form / client 直連,違原則;借其 `RealTimeStreaming` 結構自寫薄 relay)。 (043-realtime-transcription) - PostgreSQL(生產)/ SQLite(dev、CI);**不新增表、不新增 migration**——沿用增量②(0019)的 `call_records.{quantity,unit}` 與 `price_list.{price_unit,price_per_unit_usd}`,新單位 `minute` 為字串值。 (043-realtime-transcription) +- Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI、SQLAlchemy 2.x async、Alembic、Pydantic v2(後端);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** (046-cost-quota) +- PostgreSQL(生產)/ SQLite(dev、CI);**新 migration `0020`**——`allocations` 加一個 nullable 欄 `quota_cost_usd_per_month`(純加欄)。累計來源沿用既有 `call_records.cost_usd`(0019 已有)。 (046-cost-quota) - Python 3.11+ + LiteLLM(proxy core)、FastAPI(admin API)、 (001-gateway-core) @@ -88,9 +90,9 @@ cd src [ONLY COMMANDS FOR ACTIVE TECHNOLOGIES][ONLY COMMANDS FOR ACTIVE TECHNOLO Python 3.11+: Follow standard conventions ## Recent Changes +- 046-cost-quota: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI、SQLAlchemy 2.x async、Alembic、Pydantic v2(後端);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** - 043-realtime-transcription: Added Python 3.11+(後端為主)/ TypeScript strict + React 19(前端僅目錄顯示 realtime 類型 + 連線範例,極少量) + FastAPI(WebSocket — starlette 內建,**專案首次使用**)、SQLAlchemy 2.x async、Pydantic v2(皆既有);**`websockets`(直連 Azure realtime WS 的 async client,提為直接依賴——已隨 uvicorn/litellm 在 image,現宣告為直接依賴)**;既有 `proxy/preflight.py`、計費(`services/pricing.py` 的 `calculate_unit_cost`)、audit。**realtime 不經 litellm**(其 realtime 是 Proxy form / client 直連,違原則;借其 `RealTimeStreaming` 結構自寫薄 relay)。 - 042-endpoint-registry: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端少量範例) + FastAPI(含 `UploadFile` multipart,既有)、SQLAlchemy 2.x async、Pydantic v2、`litellm`(`amoderation`/`asearch`/`aimage_edit` 既有函式);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** -- 041-multi-endpoint-complete: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI(含 `UploadFile` multipart)、SQLAlchemy 2.x async、Pydantic v2、`litellm`(`aimage_generation`/`arerank`/`aspeech`/`atranscription` library form);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** diff --git a/alembic/versions/0020_cost_quota.py b/alembic/versions/0020_cost_quota.py new file mode 100644 index 0000000..d27920a --- /dev/null +++ b/alembic/versions/0020_cost_quota.py @@ -0,0 +1,29 @@ +"""Phase 33 (046): cost-based monthly quota — per-allocation USD spend cap. + +Additive, nullable column (zero regression for token quota): + allocations: quota_cost_usd_per_month (NULL ⇒ no cost cap) +Existing rows stay NULL and keep using quota_tokens_per_month unchanged. +""" +from __future__ import annotations + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +revision: str = "0020_cost_quota" +down_revision: str | Sequence[str] | None = "0019_billing_units" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.add_column( + "allocations", + sa.Column("quota_cost_usd_per_month", sa.Numeric(10, 6), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("allocations", "quota_cost_usd_per_month") diff --git a/frontend/src/components/allocation-list.tsx b/frontend/src/components/allocation-list.tsx index 23de14c..f02ab61 100644 --- a/frontend/src/components/allocation-list.tsx +++ b/frontend/src/components/allocation-list.tsx @@ -34,6 +34,8 @@ interface Allocation { revoked_at: string | null; token_prefix: string; quota_tokens_per_month?: number | null; + quota_cost_usd_per_month?: string | null; + cost_used_this_month?: string | null; price?: { input_per_1k: string; output_per_1k: string; cached_input_per_1k?: string } | null; } @@ -225,6 +227,20 @@ export function AllocationList() {
配額:無上限
) )} + {a.status === "active" && a.quota_cost_usd_per_month != null && (() => { + const used = Number(a.cost_used_this_month ?? 0); + const cap = Number(a.quota_cost_usd_per_month); + const near = cap > 0 && used / cap >= 0.8; + return ( +
+
+ 本月花費 ${used.toFixed(2)} / 上限 ${cap.toFixed(2)} + {near && "(接近上限)"} +
+ 0 ? Math.min(100, Math.round((used / cap) * 100)) : 0} /> +
+ ); + })()}
現價(每 1M): {a.price diff --git a/frontend/src/routes/admin/allocations.tsx b/frontend/src/routes/admin/allocations.tsx index 24165e0..c2364ec 100644 --- a/frontend/src/routes/admin/allocations.tsx +++ b/frontend/src/routes/admin/allocations.tsx @@ -49,6 +49,7 @@ interface AdminAllocation { display_name?: string | null; status: string; quota_tokens_per_month: number | null; + quota_cost_usd_per_month: string | null; is_service_allocation: boolean; quota_locked: boolean; token_prefix: string; @@ -86,6 +87,7 @@ export function AdminAllocationsPage() { const [showRevoked, setShowRevoked] = React.useState(false); const [quotaTarget, setQuotaTarget] = React.useState(null); const [quotaValue, setQuotaValue] = React.useState(""); + const [costValue, setCostValue] = React.useState(""); const allocsQuery = useQuery({ queryKey: ["admin", "allocations"], @@ -257,6 +259,7 @@ export function AdminAllocationsPage() { onClick={() => { setQuotaTarget(a); setQuotaValue(a.quota_tokens_per_month != null ? String(a.quota_tokens_per_month) : ""); + setCostValue(a.quota_cost_usd_per_month != null ? String(Number(a.quota_cost_usd_per_month)) : ""); }} > 調整配額 @@ -403,8 +406,9 @@ export function AdminAllocationsPage() { 調整月度配額 - 留空=無限額;否則填非負整數 tokens。 + 兩種上限可同時設、任一達到即擋;留空=該項無上限。 + 請填非負整數,或留空表示無限額。

)} + + setCostValue(e.target.value)} + /> + {costValue.trim() !== "" && !(Number(costValue) >= 0) && ( +

請填非負金額,或留空表示無上限。

+ )} +

+ 花費上限以 USD 統一治理所有端點(token / 頁 / 張 / 秒 / 分…);只治理「已定價」的用量。 +