diff --git a/CLAUDE.md b/CLAUDE.md
index 1393877..4d09128 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,6 +1,6 @@
# ai-api Development Guidelines
-Auto-generated from all feature plans. Last updated: 2026-06-12
+Auto-generated from all feature plans. Last updated: 2026-06-13
## Active Technologies
- Python 3.11+(同 Phase 1) (002-auth-membership)
@@ -68,6 +68,8 @@ Auto-generated from all feature plans. Last updated: 2026-06-12
- PostgreSQL(生產)/ SQLite(dev、CI);**不新增表/欄/migration**——沿用 0019 的 `call_records.{quantity,unit}` 與 `price_list.{price_unit,price_per_unit_usd}`,新單位 `image`/`query` 為字串值 (042-endpoint-registry)
- Python 3.11+(後端為主)/ TypeScript strict + React 19(前端僅目錄顯示 realtime 類型 + 連線範例,極少量) + FastAPI(WebSocket — starlette 內建,**專案首次使用**)、SQLAlchemy 2.x async、Pydantic v2(皆既有);**`websockets`(直連 Azure realtime WS 的 async client,提為直接依賴——已隨 uvicorn/litellm 在 image,現宣告為直接依賴)**;既有 `proxy/preflight.py`、計費(`services/pricing.py` 的 `calculate_unit_cost`)、audit。**realtime 不經 litellm**(其 realtime 是 Proxy form / client 直連,違原則;借其 `RealTimeStreaming` 結構自寫薄 relay)。 (043-realtime-transcription)
- PostgreSQL(生產)/ SQLite(dev、CI);**不新增表、不新增 migration**——沿用增量②(0019)的 `call_records.{quantity,unit}` 與 `price_list.{price_unit,price_per_unit_usd}`,新單位 `minute` 為字串值。 (043-realtime-transcription)
+- Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI、SQLAlchemy 2.x async、Alembic、Pydantic v2(後端);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件** (046-cost-quota)
+- PostgreSQL(生產)/ SQLite(dev、CI);**新 migration `0020`**——`allocations` 加一個 nullable 欄 `quota_cost_usd_per_month`(純加欄)。累計來源沿用既有 `call_records.cost_usd`(0019 已有)。 (046-cost-quota)
- Python 3.11+ + LiteLLM(proxy core)、FastAPI(admin API)、 (001-gateway-core)
@@ -88,9 +90,9 @@ cd src [ONLY COMMANDS FOR ACTIVE TECHNOLOGIES][ONLY COMMANDS FOR ACTIVE TECHNOLO
Python 3.11+: Follow standard conventions
## Recent Changes
+- 046-cost-quota: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI、SQLAlchemy 2.x async、Alembic、Pydantic v2(後端);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件**
- 043-realtime-transcription: Added Python 3.11+(後端為主)/ TypeScript strict + React 19(前端僅目錄顯示 realtime 類型 + 連線範例,極少量) + FastAPI(WebSocket — starlette 內建,**專案首次使用**)、SQLAlchemy 2.x async、Pydantic v2(皆既有);**`websockets`(直連 Azure realtime WS 的 async client,提為直接依賴——已隨 uvicorn/litellm 在 image,現宣告為直接依賴)**;既有 `proxy/preflight.py`、計費(`services/pricing.py` 的 `calculate_unit_cost`)、audit。**realtime 不經 litellm**(其 realtime 是 Proxy form / client 直連,違原則;借其 `RealTimeStreaming` 結構自寫薄 relay)。
- 042-endpoint-registry: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端少量範例) + FastAPI(含 `UploadFile` multipart,既有)、SQLAlchemy 2.x async、Pydantic v2、`litellm`(`amoderation`/`asearch`/`aimage_edit` 既有函式);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件**
-- 041-multi-endpoint-complete: Added Python 3.11+(後端)/ TypeScript strict + React 19 + Vite 6(前端) + FastAPI(含 `UploadFile` multipart)、SQLAlchemy 2.x async、Pydantic v2、`litellm`(`aimage_generation`/`arerank`/`aspeech`/`atranscription` library form);TanStack Query、shadcn/ui(前端)——**皆既有,不新增套件**
diff --git a/alembic/versions/0020_cost_quota.py b/alembic/versions/0020_cost_quota.py
new file mode 100644
index 0000000..d27920a
--- /dev/null
+++ b/alembic/versions/0020_cost_quota.py
@@ -0,0 +1,29 @@
+"""Phase 33 (046): cost-based monthly quota — per-allocation USD spend cap.
+
+Additive, nullable column (zero regression for token quota):
+ allocations: quota_cost_usd_per_month (NULL ⇒ no cost cap)
+Existing rows stay NULL and keep using quota_tokens_per_month unchanged.
+"""
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "0020_cost_quota"
+down_revision: str | Sequence[str] | None = "0019_billing_units"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+ op.add_column(
+ "allocations",
+ sa.Column("quota_cost_usd_per_month", sa.Numeric(10, 6), nullable=True),
+ )
+
+
+def downgrade() -> None:
+ op.drop_column("allocations", "quota_cost_usd_per_month")
diff --git a/frontend/src/components/allocation-list.tsx b/frontend/src/components/allocation-list.tsx
index 23de14c..f02ab61 100644
--- a/frontend/src/components/allocation-list.tsx
+++ b/frontend/src/components/allocation-list.tsx
@@ -34,6 +34,8 @@ interface Allocation {
revoked_at: string | null;
token_prefix: string;
quota_tokens_per_month?: number | null;
+ quota_cost_usd_per_month?: string | null;
+ cost_used_this_month?: string | null;
price?: { input_per_1k: string; output_per_1k: string; cached_input_per_1k?: string } | null;
}
@@ -225,6 +227,20 @@ export function AllocationList() {
配額:無上限
)
)}
+ {a.status === "active" && a.quota_cost_usd_per_month != null && (() => {
+ const used = Number(a.cost_used_this_month ?? 0);
+ const cap = Number(a.quota_cost_usd_per_month);
+ const near = cap > 0 && used / cap >= 0.8;
+ return (
+
+
+ 本月花費 ${used.toFixed(2)} / 上限 ${cap.toFixed(2)}
+ {near && "(接近上限)"}
+
+
+ );
+ })()}
現價(每 1M):
{a.price
diff --git a/frontend/src/routes/admin/allocations.tsx b/frontend/src/routes/admin/allocations.tsx
index 24165e0..c2364ec 100644
--- a/frontend/src/routes/admin/allocations.tsx
+++ b/frontend/src/routes/admin/allocations.tsx
@@ -49,6 +49,7 @@ interface AdminAllocation {
display_name?: string | null;
status: string;
quota_tokens_per_month: number | null;
+ quota_cost_usd_per_month: string | null;
is_service_allocation: boolean;
quota_locked: boolean;
token_prefix: string;
@@ -86,6 +87,7 @@ export function AdminAllocationsPage() {
const [showRevoked, setShowRevoked] = React.useState(false);
const [quotaTarget, setQuotaTarget] = React.useState
(null);
const [quotaValue, setQuotaValue] = React.useState("");
+ const [costValue, setCostValue] = React.useState("");
const allocsQuery = useQuery({
queryKey: ["admin", "allocations"],
@@ -257,6 +259,7 @@ export function AdminAllocationsPage() {
onClick={() => {
setQuotaTarget(a);
setQuotaValue(a.quota_tokens_per_month != null ? String(a.quota_tokens_per_month) : "");
+ setCostValue(a.quota_cost_usd_per_month != null ? String(Number(a.quota_cost_usd_per_month)) : "");
}}
>
調整配額
@@ -403,8 +406,9 @@ export function AdminAllocationsPage() {
調整月度配額
- 留空=無限額;否則填非負整數 tokens。
+ 兩種上限可同時設、任一達到即擋;留空=該項無上限。
+
請填非負整數,或留空表示無限額。
)}
+
+ setCostValue(e.target.value)}
+ />
+ {costValue.trim() !== "" && !(Number(costValue) >= 0) && (
+ 請填非負金額,或留空表示無上限。
+ )}
+
+ 花費上限以 USD 統一治理所有端點(token / 頁 / 張 / 秒 / 分…);只治理「已定價」的用量。
+