diff --git a/internal/rating/oracle_test.go b/internal/rating/oracle_test.go index 7322006..4d3ef57 100644 --- a/internal/rating/oracle_test.go +++ b/internal/rating/oracle_test.go @@ -32,9 +32,12 @@ import ( type RatedEvent struct { AuthID string // ResourceID is the deployment id (E2 customer attribution — billing resolves the - // org via resource_id→org_id). Part of the rollup grain. Empty → unattributable - // (the row can't name its deployment/org), mirroring the SQL's resource_id-IS-NULL - // handling: counted, never billed. + // org via resource_id→org_id). Part of the rollup grain. Empty ("") MODELS A NULL + // resource_id column: the oracle has no separate NULL, so "" stands in for it and is + // counted unattributable (the row can't name its deployment/org), never billed. This + // mirrors the SQL's `resource_id IS NULL` handling exactly because production never + // lets a literal '' into the column — the drainer's nullStr writes ''→NULL and the + // proxy billing gate fails closed on empty ResourceID before metering. ResourceID string ModelID string // BaseModel is the HF base id a fine-tune derives from (E3), carried on the event diff --git a/internal/rating/rater_test.go b/internal/rating/rater_test.go index be75101..a1040fd 100644 --- a/internal/rating/rater_test.go +++ b/internal/rating/rater_test.go @@ -65,6 +65,15 @@ func (s *oracleStore) resolveWindow(start, end time.Time) (map[rollupKey]oracleR if e.At.Before(start) || !e.At.Before(end) { continue } + // EMPTY-STRING MODELS A NULL IDENTITY COLUMN. The Go oracle has no separate + // NULL, so "" stands in for a NULL auth_id/resource_id/model_id; production SQL + // instead filters `... IS NULL`. The two agree ONLY because of two production + // guarantees that ensure a literal '' never reaches billing_event: the drainer's + // nullStr maps ''→NULL on write (internal/drain/store.go) and the proxy billing + // gate fails closed on an empty ResourceID before metering. With '' impossible in + // the column, `== ""` here faithfully mirrors SQL's `IS NULL`, so the oracle's + // unattributable partition matches the rater's `resource_id IS NULL` count. Do not + // "fix" this to also test '' — '' is out of the modeled domain by construction. if e.AuthID == "" || e.ResourceID == "" || e.ModelID == "" { an.UnattributableEvents++ continue diff --git a/internal/rating/store_integration_test.go b/internal/rating/store_integration_test.go index 79db256..3a668a5 100644 --- a/internal/rating/store_integration_test.go +++ b/internal/rating/store_integration_test.go @@ -70,11 +70,11 @@ CREATE TABLE rated_usage ( ); -- Mirror the production indexes (migrations/0002_rating.sql): the auth-leading index --- for billing queries, the resource_id-leading index for E2 per-deployment reads, and --- the window_start-leading index the reconcile DELETE needs (it filters window_start --- alone; every auth/resource-leading index leaves it trailing). +-- for billing queries and the window_start-leading index the reconcile DELETE needs (it +-- filters window_start alone; every auth-leading index leaves it trailing). No standalone +-- (resource_id, window_start) index — production ships none until the E2 per-deployment +-- reader exists (see the migration's NOTE), so the fixture omits it too. CREATE INDEX rated_usage_auth_id_window_start_ix ON rated_usage (auth_id, window_start); -CREATE INDEX rated_usage_resource_id_window_start_ix ON rated_usage (resource_id, window_start); CREATE INDEX rated_usage_window_start_ix ON rated_usage (window_start);` // conformanceBook is the fixture price book shared by the conformance tests: base diff --git a/internal/rating/store_test.go b/internal/rating/store_test.go index a055d9b..98a71d6 100644 --- a/internal/rating/store_test.go +++ b/internal/rating/store_test.go @@ -115,7 +115,10 @@ func TestRateWindowSQL_Shape(t *testing.T) { "WHERE prompt_price IS NOT NULL", "AND auth_id IS NOT NULL", "AND model_id IS NOT NULL", - "AND resource_id IS NOT NULL", + // Anchor the grouped filter's resource_id guard to its GROUP BY (which uniquely + // follows it), so this pins the priced/grouped clause specifically — not the bare + // substring, which would also match the unpriced-count guard below. + "AND resource_id IS NOT NULL\n GROUP BY auth_id, resource_id, model_id", // session-TZ-independent hour bucket "date_trunc('hour', ev_ts AT TIME ZONE 'UTC') AT TIME ZONE 'UTC'", // deterministic natural-key surrogate id (re-runs regenerate the same id), @@ -141,10 +144,16 @@ func TestRateWindowSQL_Shape(t *testing.T) { "p.resource_id = ru.resource_id", "AS reconciled_deletions", // the anomaly counts ride the SAME statement (one snapshot) as the upsert; a NULL - // resource_id is counted UNATTRIBUTABLE (fail closed), never billed + // resource_id is counted UNATTRIBUTABLE (fail closed), never billed. + // PARTITION EXCLUSIVITY: the unpriced count must require FULL attribution + // (auth_id, resource_id, model_id all NON-NULL) so a NULL-resource_id unpriced row + // is counted ONLY as unattributable, never double-counted. Pin the whole + // contiguous WHERE so the resource_id guard is anchored to THIS count clause — a + // bare "AND resource_id IS NOT NULL" would also match the grouped/priced filter and + // wouldn't catch the guard being dropped from the unpriced count. + "WHERE prompt_price IS NULL\n AND auth_id IS NOT NULL\n AND resource_id IS NOT NULL\n AND model_id IS NOT NULL)", "AS unpriced_events", "OR resource_id IS NULL OR model_id IS NULL) AS unattributable_events", - "AND resource_id IS NOT NULL", // the E3 ft-uniqueness gate: an ft: rollup spanning >1 base_model is split out "COUNT(DISTINCT base_model) FILTER (WHERE via_derived) > 1 AS ambiguous_base", "WHERE NOT ambiguous_base", diff --git a/migrations/0002_rating.sql b/migrations/0002_rating.sql index 7ae4eeb..f348e93 100644 --- a/migrations/0002_rating.sql +++ b/migrations/0002_rating.sql @@ -104,12 +104,14 @@ CREATE TABLE rated_usage ( CREATE INDEX rated_usage_auth_id_window_start_ix ON rated_usage (auth_id, window_start); --- E2 customer attribution reads rated_usage by DEPLOYMENT over a time window (resolve --- the org via resource_id→org_id, then sum that deployment's cost). resource_id leads --- so a per-deployment time-range query is a tight index slice rather than a scan over --- an auth-leading index where resource_id is only a trailing column. -CREATE INDEX rated_usage_resource_id_window_start_ix - ON rated_usage (resource_id, window_start); +-- NOTE: rated_usage carries a resource_id column (part of the unique grain), but +-- this migration deliberately ships NO standalone (resource_id, window_start) index. +-- The only reader that would want it is the E2 per-deployment billing consumer +-- (resolve the org via resource_id→org_id, sum that deployment's cost), which does +-- not exist in this repo yet — it's the future Atlas/Stripe consumer. Adding the +-- index now would pay write-amplification on EVERY rated_usage upsert for an absent +-- reader; per "forward intent without speculative build" it lands in the PR that +-- adds the E2 reader, alongside an EXPLAIN that proves it's used. -- The reconcile DELETE (re-rate convergence, the `deleted` CTE in -- internal/rating/store.go) filters rated_usage on window_start ALONE diff --git a/migrations/atlas/c2f1a3b4d5e6_add_rating.py b/migrations/atlas/c2f1a3b4d5e6_add_rating.py index 7db004c..8323845 100644 --- a/migrations/atlas/c2f1a3b4d5e6_add_rating.py +++ b/migrations/atlas/c2f1a3b4d5e6_add_rating.py @@ -106,17 +106,15 @@ def upgrade(): unique=False, ) - # E2 customer attribution reads rated_usage by DEPLOYMENT over a time window - # (resolve the org via resource_id→org_id, then sum that deployment's cost). A - # resource_id-leading index makes a per-deployment time-range query a tight slice - # rather than a scan over the auth-leading index where resource_id only trails. - # Mirrors migrations/0002_rating.sql. - op.create_index( - "rated_usage_resource_id_window_start_ix", - "rated_usage", - ["resource_id", "window_start"], - unique=False, - ) + # NOTE: rated_usage carries a resource_id column (part of the unique grain), but + # this migration deliberately ships NO standalone (resource_id, window_start) index. + # The only reader that would want it is the E2 per-deployment billing consumer + # (resolve the org via resource_id→org_id, sum that deployment's cost), which does + # not exist in this repo yet — it's the future Atlas/Stripe consumer. Adding the + # index now would pay write-amplification on EVERY rated_usage upsert for an absent + # reader; per "forward intent without speculative build" it lands in the PR that + # adds the E2 reader, alongside an EXPLAIN that proves it's used. Mirrors + # migrations/0002_rating.sql. # The reconcile DELETE (re-rate convergence, the `deleted` CTE in # internal/rating/store.go) filters rated_usage on window_start ALONE @@ -178,11 +176,6 @@ def downgrade(): op.drop_index( "rated_usage_window_start_ix", table_name="rated_usage", if_exists=True ) - op.drop_index( - "rated_usage_resource_id_window_start_ix", - table_name="rated_usage", - if_exists=True, - ) op.drop_index( "rated_usage_auth_id_window_start_ix", table_name="rated_usage", if_exists=True )