diff --git a/CLAUDE.md b/CLAUDE.md index 1393877..4d09128 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,6 +1,6 @@ # ai-api Development Guidelines -Auto-generated from all feature plans. Last updated: 2026-06-12 +Auto-generated from all feature plans. Last updated: 2026-06-13 ## Active Technologies - Python 3.11+(同 Phase 1) (002-auth-membership) @@ -68,6 +68,8 @@ Auto-generated from all feature plans. Last updated: 2026-06-12 - PostgreSQL(生產)/ SQLite(dev、CI);**不新增表/欄/migration**——沿用 0019 的 `call_records.{quantity,unit}` 與 `price_list.{price_unit,price_per_unit_usd}`,新單位 `image`/`query` 為字串值 (042-endpoint-registry) - Python 3.11+(後端為主)/ TypeScript strict + React 19(前端僅目錄顯示 realtime 類型 + 連線範例,極少量) + FastAPI(WebSocket — starlette 內建,**專案首次使用**)、SQLAlchemy 2.x async、Pydantic v2(皆既有);**`websockets`(直連 Azure realtime WS 的 async client,提為直接依賴——已隨 uvicorn/litellm 在 image,現宣告為直接依賴)**;既有 `proxy/preflight.py`、計費(`services/pricing.py` 的 `calculate_unit_cost`)、audit。**realtime 不經 litellm**(其 realtime 是 Proxy form / client 直連,違原則;借其 `RealTimeStreaming` 結構自寫薄 relay)。 (043-realtime-transcription) - PostgreSQL(生產)/ SQLite(dev、CI);**不新增表、不新增 migration**——沿用增量②(0019)的 `call_records.{quantity,unit}` 與 `price_list.{price_unit,price_per_unit_usd}`,新單位 `minute` 為字串值。 (043-realtime-transcription) +- Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI、SQLAlchemy 2.x async、Alembic、Pydantic v2(後端);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** (046-cost-quota) +- PostgreSQL(生產)/ SQLite(dev、CI);**新 migration `0020`**——`allocations` 加一個 nullable 欄 `quota_cost_usd_per_month`(純加欄)。累計來源沿用既有 `call_records.cost_usd`(0019 已有)。 (046-cost-quota) - Python 3.11+ + LiteLLM(proxy core)、FastAPI(admin API)、 (001-gateway-core) @@ -88,9 +90,9 @@ cd src [ONLY COMMANDS FOR ACTIVE TECHNOLOGIES][ONLY COMMANDS FOR ACTIVE TECHNOLO Python 3.11+: Follow standard conventions ## Recent Changes +- 046-cost-quota: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI、SQLAlchemy 2.x async、Alembic、Pydantic v2(後端);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** - 043-realtime-transcription: Added Python 3.11+(後端為主)/ TypeScript strict + React 19(前端僅目錄顯示 realtime 類型 + 連線範例,極少量) + FastAPI(WebSocket — starlette 內建,**專案首次使用**)、SQLAlchemy 2.x async、Pydantic v2(皆既有);**`websockets`(直連 Azure realtime WS 的 async client,提為直接依賴——已隨 uvicorn/litellm 在 image,現宣告為直接依賴)**;既有 `proxy/preflight.py`、計費(`services/pricing.py` 的 `calculate_unit_cost`)、audit。**realtime 不經 litellm**(其 realtime 是 Proxy form / client 直連,違原則;借其 `RealTimeStreaming` 結構自寫薄 relay)。 - 042-endpoint-registry: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端少量範例) + FastAPI(含 `UploadFile` multipart,既有)、SQLAlchemy 2.x async、Pydantic v2、`litellm`(`amoderation`/`asearch`/`aimage_edit` 既有函式);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** -- 041-multi-endpoint-complete: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI(含 `UploadFile` multipart)、SQLAlchemy 2.x async、Pydantic v2、`litellm`(`aimage_generation`/`arerank`/`aspeech`/`atranscription` library form);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** diff --git a/alembic/versions/0020_cost_quota.py b/alembic/versions/0020_cost_quota.py new file mode 100644 index 0000000..d27920a --- /dev/null +++ b/alembic/versions/0020_cost_quota.py @@ -0,0 +1,29 @@ +"""Phase 33 (046): cost-based monthly quota — per-allocation USD spend cap. + +Additive, nullable column (zero regression for token quota): + allocations: quota_cost_usd_per_month (NULL ⇒ no cost cap) +Existing rows stay NULL and keep using quota_tokens_per_month unchanged. +""" +from __future__ import annotations + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +revision: str = "0020_cost_quota" +down_revision: str | Sequence[str] | None = "0019_billing_units" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.add_column( + "allocations", + sa.Column("quota_cost_usd_per_month", sa.Numeric(10, 6), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("allocations", "quota_cost_usd_per_month") diff --git a/frontend/src/components/allocation-list.tsx b/frontend/src/components/allocation-list.tsx index 23de14c..f02ab61 100644 --- a/frontend/src/components/allocation-list.tsx +++ b/frontend/src/components/allocation-list.tsx @@ -34,6 +34,8 @@ interface Allocation { revoked_at: string | null; token_prefix: string; quota_tokens_per_month?: number | null; + quota_cost_usd_per_month?: string | null; + cost_used_this_month?: string | null; price?: { input_per_1k: string; output_per_1k: string; cached_input_per_1k?: string } | null; } @@ -225,6 +227,20 @@ export function AllocationList() {
配額:無上限
) )} + {a.status === "active" && a.quota_cost_usd_per_month != null && (() => { + const used = Number(a.cost_used_this_month ?? 0); + const cap = Number(a.quota_cost_usd_per_month); + const near = cap > 0 && used / cap >= 0.8; + return ( +
+
+ 本月花費 ${used.toFixed(2)} / 上限 ${cap.toFixed(2)} + {near && "(接近上限)"} +
+ 0 ? Math.min(100, Math.round((used / cap) * 100)) : 0} /> +
+ ); + })()}
現價(每 1M): {a.price diff --git a/frontend/src/routes/admin/allocations.tsx b/frontend/src/routes/admin/allocations.tsx index 24165e0..c2364ec 100644 --- a/frontend/src/routes/admin/allocations.tsx +++ b/frontend/src/routes/admin/allocations.tsx @@ -49,6 +49,7 @@ interface AdminAllocation { display_name?: string | null; status: string; quota_tokens_per_month: number | null; + quota_cost_usd_per_month: string | null; is_service_allocation: boolean; quota_locked: boolean; token_prefix: string; @@ -86,6 +87,7 @@ export function AdminAllocationsPage() { const [showRevoked, setShowRevoked] = React.useState(false); const [quotaTarget, setQuotaTarget] = React.useState(null); const [quotaValue, setQuotaValue] = React.useState(""); + const [costValue, setCostValue] = React.useState(""); const allocsQuery = useQuery({ queryKey: ["admin", "allocations"], @@ -257,6 +259,7 @@ export function AdminAllocationsPage() { onClick={() => { setQuotaTarget(a); setQuotaValue(a.quota_tokens_per_month != null ? String(a.quota_tokens_per_month) : ""); + setCostValue(a.quota_cost_usd_per_month != null ? String(Number(a.quota_cost_usd_per_month)) : ""); }} > 調整配額 @@ -403,8 +406,9 @@ export function AdminAllocationsPage() { 調整月度配額 - 留空=無限額;否則填非負整數 tokens。 + 兩種上限可同時設、任一達到即擋;留空=該項無上限。 + 請填非負整數,或留空表示無限額。

)} + + setCostValue(e.target.value)} + /> + {costValue.trim() !== "" && !(Number(costValue) >= 0) && ( +

請填非負金額,或留空表示無上限。

+ )} +

+ 花費上限以 USD 統一治理所有端點(token / 頁 / 張 / 秒 / 分…);只治理「已定價」的用量。 +