diff --git a/cmd/rater/main.go b/cmd/rater/main.go index e74d1d9..c770edc 100644 --- a/cmd/rater/main.go +++ b/cmd/rater/main.go @@ -208,7 +208,7 @@ func run() int { defer func() { _ = store.Close() }() r := rating.New(store, book, log) - res, err := r.Run(ctx, windowStart, windowEnd) + res, err := r.Run(ctx, windowStart, windowEnd, windowExplicit) if err != nil { log.Error.Printf("rater: run: %v", err) return exitFatal diff --git a/cmd/rater/main_test.go b/cmd/rater/main_test.go index 8bf304c..f4d21d2 100644 --- a/cmd/rater/main_test.go +++ b/cmd/rater/main_test.go @@ -108,14 +108,17 @@ func TestResolveWindow_BadFormat(t *testing.T) { } } -// TestRater_RoutineRunReconcileDeleteExitsNonzero pins the loud half of the -// reconcile-exit contract: a ROUTINE run (the default trailing-hours window — +// TestRater_RoutineRunReconcileDeleteExitsNonzero pins the EXIT-CODE half of the +// reconcile observability contract: a ROUTINE run (the default trailing-hours window — // windowExplicit == false) that DELETED a previously-billed rated_usage row // (ReconciledDeletions > 0) is a revenue change with no operator behind it, which // means data was lost / an upstream regression dropped events. It MUST exit nonzero -// (exitAnomaly) so a CronJob pages — even with NO other anomaly. RED before FIX 1: -// the old exit path keyed solely on HasAnomaly(), so a reconcile-delete with no -// other anomaly returned exitOK and the page never fired. +// (exitAnomaly) so a CronJob pages — even with NO other anomaly. This pins ONLY the +// exit code (exitCode()'s gate); the matching LOG-severity half (routine → ERROR) is +// pinned separately by TestRater_RoutineReconcileDeleteLogsError in internal/rating, +// where the reconcile-delete log line is emitted. RED before FIX 1's exit-code change: +// the old exit path keyed solely on HasAnomaly(), so a reconcile-delete with no other +// anomaly returned exitOK and the page never fired. func TestRater_RoutineRunReconcileDeleteExitsNonzero(t *testing.T) { const ( windowExplicit = false // routine default trailing-hours window diff --git a/internal/e2e/e2e_test.go b/internal/e2e/e2e_test.go index dbac15d..6c2a3c2 100644 --- a/internal/e2e/e2e_test.go +++ b/internal/e2e/e2e_test.go @@ -292,7 +292,7 @@ func (h *harness) rateEventHour(t *testing.T, book *rating.PriceBook) rating.Res hour := evTS.UTC().Truncate(time.Hour) rater := rating.New(rating.NewPostgresStore(h.db), book, h.log) - res, err := rater.Run(context.Background(), hour, hour.Add(time.Hour)) + res, err := rater.Run(context.Background(), hour, hour.Add(time.Hour), false) if err != nil { t.Fatalf("rater.Run: %v", err) } diff --git a/internal/rating/rater.go b/internal/rating/rater.go index 8b49c5a..643799e 100644 --- a/internal/rating/rater.go +++ b/internal/rating/rater.go @@ -93,7 +93,19 @@ func (r Result) HasAnomaly() bool { // re-run converges rated_usage to exactly the latest run's output — never // double-counts, and never leaves a superseded rollup billing at its stale cost. A // clean identical re-run is a no-op (same rows upserted, nothing deleted). See store.go. -func (r *Rater) Run(ctx context.Context, windowStart, windowEnd time.Time) (Result, error) { +// +// windowExplicit threads the routine-vs-backfill distinction into the rating-side +// observability so the LOG severity of a reconcile-delete matches the EXIT-code +// contract (option (c), see cmd/rater). On a ROUTINE run (default trailing-hours +// window, windowExplicit == false) a reconcile-delete rewrote a prior bill with no +// operator behind it — data vanished from billing_event or an upstream regression +// dropped events — so it is logged at ERROR (page). On an EXPLICIT backfill +// (--since/--until, windowExplicit == true) the same delete is intended convergence, +// logged at INFO. The reconcile SEMANTICS are identical either way; only the log +// severity (and, in cmd/rater, the exit code) turns on windowExplicit. The flag is +// passed in rather than recomputed here because only cmd/rater knows how the window +// was chosen, mirroring the exit-code gate. +func (r *Rater) Run(ctx context.Context, windowStart, windowEnd time.Time, windowExplicit bool) (Result, error) { windowStart = windowStart.UTC() windowEnd = windowEnd.UTC() res := Result{WindowStart: windowStart, WindowEnd: windowEnd} @@ -128,11 +140,25 @@ func (r *Rater) Run(ctx context.Context, windowStart, windowEnd time.Time) (Resu } // A re-rate that SUPERSEDES prior billing (deleted stale rollups) is significant — - // not an anomaly, but it changes a customer's bill, so surface it. 0 on a first run - // or a clean identical re-run. + // not a leaked-anomaly (it does not flip HasAnomaly), but it changes a customer's + // bill, so surface it. 0 on a first run or a clean identical re-run. The SEVERITY + // turns on windowExplicit (the routine-vs-backfill contract, option (c)): + // - ROUTINE (default trailing-hours window, !windowExplicit): no operator chose + // this window, so rewriting a prior bill is alarming — events vanished from + // billing_event (data loss) or an upstream regression dropped them (e.g. + // base_model stopped propagating → rollups went unpriced/ambiguous). ERROR: + // page/investigate. This is the loud half that matches cmd/rater's exit 2. + // - EXPLICIT backfill (--since/--until, windowExplicit): convergence is exactly + // what the operator asked for (e.g. a late price fix) — INFO, not a page. + // The deletion count + window appear in both; only the level and the wording differ. if res.ReconciledDeletions > 0 { - r.log.Info.Printf("rating: window [%s,%s) reconcile DELETED %d stale rollup(s) that prior runs billed but this run no longer produces (became ambiguous/unpriced, or their events vanished) — 'what the latest run says is what bills'", - windowStart.Format(time.RFC3339), windowEnd.Format(time.RFC3339), res.ReconciledDeletions) + if windowExplicit { + r.log.Info.Printf("rating: EXPLICIT backfill window [%s,%s) reconcile DELETED %d stale rollup(s) that prior runs billed but this run no longer produces (became ambiguous/unpriced, or their events vanished) — intended convergence, 'what the latest run says is what bills'", + windowStart.Format(time.RFC3339), windowEnd.Format(time.RFC3339), res.ReconciledDeletions) + } else { + r.log.Error.Printf("rating: ROUTINE window [%s,%s) reconcile DELETED %d previously-billed rollup(s) — a routine run REWROTE A PRIOR BILL with no operator behind it, meaning those events VANISHED from billing_event (data loss) or an upstream regression now drops them (e.g. base_model stopped propagating → rollups went unpriced/ambiguous). This is NOT a backfill; page/investigate (run with --since/--until only after confirming the deletion is intended)", + windowStart.Format(time.RFC3339), windowEnd.Format(time.RFC3339), res.ReconciledDeletions) + } } if res.HasAnomaly() { diff --git a/internal/rating/rater_test.go b/internal/rating/rater_test.go index c0f0925..00738b8 100644 --- a/internal/rating/rater_test.go +++ b/internal/rating/rater_test.go @@ -1,7 +1,9 @@ package rating import ( + "bytes" "context" + "strings" "testing" "time" @@ -170,7 +172,7 @@ func TestRater_MultiEventAggregation(t *testing.T) { store := newOracleStore(bookM(), events) r := New(store, bookM(), testLogger()) - res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")) + res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false) if err != nil { t.Fatal(err) } @@ -204,11 +206,11 @@ func TestRater_IdempotentRerunNoDoubling(t *testing.T) { r := New(store, bookM(), testLogger()) ws, we := mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z") - res1, err := r.Run(context.Background(), ws, we) + res1, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } - res2, err := r.Run(context.Background(), ws, we) + res2, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } @@ -245,7 +247,7 @@ func TestRater_ReRateDeletesSupersededRollup(t *testing.T) { {AuthID: "a", ModelID: "ft:dupe", BaseModel: "cheap/base", PromptTokens: 1000, At: at}, }) r := New(store, book, testLogger()) - resA, err := r.Run(context.Background(), ws, we) + resA, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } @@ -261,7 +263,7 @@ func TestRater_ReRateDeletesSupersededRollup(t *testing.T) { // window → ambiguous. Run B excludes it from priced. store.events = append(store.events, RatedEvent{AuthID: "a", ModelID: "ft:dupe", BaseModel: "expensive/base", PromptTokens: 1000, At: at.Add(5 * time.Minute)}) - resB, err := r.Run(context.Background(), ws, we) + resB, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } @@ -301,7 +303,7 @@ func TestRater_ReRateSupersedesOneKeepsAnotherSameWindow(t *testing.T) { {AuthID: "a", ModelID: "ft:gone", BaseModel: "cheap/base", PromptTokens: 1000, At: at}, }) r := New(store, book, testLogger()) - if _, err := r.Run(context.Background(), ws, we); err != nil { + if _, err := r.Run(context.Background(), ws, we, false); err != nil { t.Fatal(err) } if len(store.table) != 2 { @@ -310,7 +312,7 @@ func TestRater_ReRateSupersedesOneKeepsAnotherSameWindow(t *testing.T) { // Run B: ft:gone gains a second base (ambiguous → superseded); ft:keep unchanged. store.events = append(store.events, RatedEvent{AuthID: "a", ModelID: "ft:gone", BaseModel: "expensive/base", PromptTokens: 1000, At: at.Add(5 * time.Minute)}) - resB, err := r.Run(context.Background(), ws, we) + resB, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } @@ -339,7 +341,7 @@ func TestRater_ReRateDeletesVanishedRollup(t *testing.T) { {AuthID: "a", ModelID: "m", PromptTokens: 100, CompletionTokens: 50, At: at}, }) r := New(store, bookM(), testLogger()) - if _, err := r.Run(context.Background(), ws, we); err != nil { + if _, err := r.Run(context.Background(), ws, we, false); err != nil { t.Fatal(err) } if len(store.table) != 1 { @@ -347,7 +349,7 @@ func TestRater_ReRateDeletesVanishedRollup(t *testing.T) { } // All events for the window vanish. store.events = nil - resB, err := r.Run(context.Background(), ws, we) + resB, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } @@ -370,11 +372,11 @@ func TestRater_ReRateIdenticalDataIsNoOp(t *testing.T) { }) r := New(store, bookM(), testLogger()) - res1, err := r.Run(context.Background(), ws, we) + res1, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } - res2, err := r.Run(context.Background(), ws, we) + res2, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } @@ -393,11 +395,11 @@ func TestRater_ReRateIdenticalDataIsNoOp(t *testing.T) { otherHour := mustTime("2026-06-08T12:00:00Z") store.events = append(store.events, RatedEvent{AuthID: "a", ModelID: "m", PromptTokens: 5, At: otherHour.Add(5 * time.Minute)}) - if _, err := r.Run(context.Background(), otherHour, otherHour.Add(time.Hour)); err != nil { + if _, err := r.Run(context.Background(), otherHour, otherHour.Add(time.Hour), false); err != nil { t.Fatal(err) } // Now re-rate ONLY the first window again: the 12:00 rollup must survive untouched. - resFirst, err := r.Run(context.Background(), ws, we) + resFirst, err := r.Run(context.Background(), ws, we, false) if err != nil { t.Fatal(err) } @@ -410,6 +412,111 @@ func TestRater_ReRateIdenticalDataIsNoOp(t *testing.T) { } } +// captureLogger builds a Logger whose INFO and ERROR streams are redirected to the +// returned buffers, so a test can assert WHICH severity a reconcile-delete fired at. +// It starts at DEBUG so neither stream is discarded by the level filter — the test +// is asserting the call site's chosen level, not the logger's threshold. +func captureLogger() (*logging.Logger, *bytes.Buffer, *bytes.Buffer) { + log := logging.New(logging.DEBUG) + var infoBuf, errBuf bytes.Buffer + log.Info.SetOutput(&infoBuf) + log.Error.SetOutput(&errBuf) + return log, &infoBuf, &errBuf +} + +// supersededReconcileStore returns an oracleStore whose first Run produces ONE clean +// rollup and whose second Run (after the events vanish) reconcile-DELETES it with NO +// other anomaly — the minimal reconcile-delete (HasAnomaly stays false), so a test can +// attribute the resulting log line purely to the reconcile path, not the anomaly path. +func supersededReconcileStore() (*oracleStore, time.Time, time.Time) { + at := mustTime("2026-06-08T10:15:00Z") + ws, we := mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z") + store := newOracleStore(bookM(), []RatedEvent{ + {AuthID: "a", ModelID: "m", PromptTokens: 100, CompletionTokens: 50, At: at}, + }) + return store, ws, we +} + +// TestRater_RoutineReconcileDeleteLogsError pins the LOUD half of the reconcile +// observability contract (option (c)): a ROUTINE run (windowExplicit == false) that +// reconcile-DELETES a previously-billed rollup must emit an ERROR line (page someone), +// even with NO other anomaly. RED before FIX 1: the old code logged the reconcile-delete +// UNCONDITIONALLY at INFO (testLogger discards INFO), so the ERROR stream was empty and +// this assertion failed — the page never fired on a routine bill rewrite. +func TestRater_RoutineReconcileDeleteLogsError(t *testing.T) { + store, ws, we := supersededReconcileStore() + log, infoBuf, errBuf := captureLogger() + r := New(store, bookM(), log) + + // Run A: one clean rollup, no reconcile-delete. + if _, err := r.Run(context.Background(), ws, we, false); err != nil { + t.Fatal(err) + } + infoBuf.Reset() + errBuf.Reset() + + // Run B (routine): the events vanish → the rollup is reconcile-deleted with no anomaly. + store.events = nil + resB, err := r.Run(context.Background(), ws, we, false) + if err != nil { + t.Fatal(err) + } + if resB.ReconciledDeletions != 1 { + t.Fatalf("setup: deletions=%d, want 1", resB.ReconciledDeletions) + } + if resB.HasAnomaly() { + t.Fatalf("setup: a clean reconcile-delete must NOT flip HasAnomaly (got %+v) — else the ERROR could come from the anomaly path, not the reconcile path", resB) + } + // THE INVARIANT: a routine reconcile-delete fires an ERROR (the page). + if errBuf.Len() == 0 { + t.Fatal("routine reconcile-delete emitted NO ERROR line — a prior bill was rewritten with no operator behind it (data loss / upstream regression); it MUST log at ERROR so a CronJob pages") + } + if !strings.Contains(errBuf.String(), "ROUTINE") || !strings.Contains(errBuf.String(), "reconcile") { + t.Fatalf("routine reconcile-delete ERROR line lacks the routine-rewrite wording: %q", errBuf.String()) + } + // The deletion count and window must be present in the loud line. + if !strings.Contains(errBuf.String(), "[2026-06-08T10:00:00Z,2026-06-08T11:00:00Z)") { + t.Fatalf("ERROR line is missing the window: %q", errBuf.String()) + } +} + +// TestRater_BackfillReconcileDeleteLogsInfoNoError pins the QUIET half: an EXPLICIT +// operator backfill (windowExplicit == true) that reconcile-deletes the SAME rollup is +// intended convergence — it logs at INFO and emits NO ERROR (no page). The flag flips +// ONLY the reconcile-delete severity; identical data, identical reconcile, opposite level. +func TestRater_BackfillReconcileDeleteLogsInfoNoError(t *testing.T) { + store, ws, we := supersededReconcileStore() + log, infoBuf, errBuf := captureLogger() + r := New(store, bookM(), log) + + // Run A (explicit backfill): one clean rollup. + if _, err := r.Run(context.Background(), ws, we, true); err != nil { + t.Fatal(err) + } + infoBuf.Reset() + errBuf.Reset() + + // Run B (explicit backfill): the events vanish → reconcile-delete, but operator-chosen. + store.events = nil + resB, err := r.Run(context.Background(), ws, we, true) + if err != nil { + t.Fatal(err) + } + if resB.ReconciledDeletions != 1 { + t.Fatalf("setup: deletions=%d, want 1", resB.ReconciledDeletions) + } + // THE INVARIANT: a backfill reconcile-delete is INFO, never ERROR. + if errBuf.Len() != 0 { + t.Fatalf("explicit backfill reconcile-delete emitted an ERROR line (it must be INFO — the operator asked for this convergence): %q", errBuf.String()) + } + if !strings.Contains(infoBuf.String(), "reconcile DELETED") { + t.Fatalf("explicit backfill reconcile-delete did not log the convergence INFO line: %q", infoBuf.String()) + } + if !strings.Contains(infoBuf.String(), "[2026-06-08T10:00:00Z,2026-06-08T11:00:00Z)") { + t.Fatalf("INFO line is missing the window: %q", infoBuf.String()) + } +} + // TestRater_LateArrivalRatedByTrailingWindow: an event whose event_ts falls in hour // H but which is DRAINED only after H was rated is picked up by a later run whose // window still covers H — the trailing-window contract. The re-rate REPLACES H's @@ -419,7 +526,7 @@ func TestRater_LateArrivalRatedByTrailingWindow(t *testing.T) { store := newOracleStore(bookM(), nil) r := New(store, bookM(), testLogger()) - res1, err := r.Run(context.Background(), hourH, hourH.Add(time.Hour)) + res1, err := r.Run(context.Background(), hourH, hourH.Add(time.Hour), false) if err != nil { t.Fatal(err) } @@ -431,7 +538,7 @@ func TestRater_LateArrivalRatedByTrailingWindow(t *testing.T) { AuthID: "a", ModelID: "m", PromptTokens: 100, CompletionTokens: 50, At: hourH.Add(15 * time.Minute), }) - res2, err := r.Run(context.Background(), hourH.Add(-23*time.Hour), hourH.Add(2*time.Hour)) + res2, err := r.Run(context.Background(), hourH.Add(-23*time.Hour), hourH.Add(2*time.Hour), false) if err != nil { t.Fatal(err) } @@ -455,7 +562,7 @@ func TestRater_MissingPriceFailsLoudNotZero(t *testing.T) { } store := newOracleStore(bookM(), events) r := New(store, bookM(), testLogger()) - res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")) + res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false) if err != nil { t.Fatal(err) } @@ -486,7 +593,7 @@ func TestRater_UnattributableCountedNotSilent(t *testing.T) { } store := newOracleStore(bookM(), events) r := New(store, bookM(), testLogger()) - res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")) + res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false) if err != nil { t.Fatal(err) } @@ -514,7 +621,7 @@ func TestRater_DerivedFineTuneRatedViaPolicy(t *testing.T) { } store := newOracleStore(book, events) r := New(store, book, testLogger()) - res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")) + res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false) if err != nil { t.Fatal(err) } @@ -544,7 +651,7 @@ func TestRater_FineTunePricesViaBaseModelOnEvent(t *testing.T) { }} store := newOracleStore(book, events) r := New(store, book, testLogger()) - res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")) + res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false) if err != nil { t.Fatal(err) } @@ -583,7 +690,7 @@ func TestRater_FineTuneWithoutBaseModelFailsLoud(t *testing.T) { }} store := newOracleStore(book, events) r := New(store, book, testLogger()) - res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")) + res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false) if err != nil { t.Fatal(err) } @@ -628,7 +735,7 @@ func TestRater_FineTuneAmbiguousBaseModelFailsLoud(t *testing.T) { } store := newOracleStore(book, events) r := New(store, book, testLogger()) - res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")) + res, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false) if err != nil { t.Fatal(err) } @@ -686,7 +793,7 @@ func TestRater_RollupCostSelfAudits(t *testing.T) { } store := newOracleStore(book, events) r := New(store, book, testLogger()) - if _, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")); err != nil { + if _, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false); err != nil { t.Fatal(err) } if len(store.table) == 0 { @@ -719,7 +826,7 @@ func TestRater_AppliedRateStoredOnRow(t *testing.T) { } store := newOracleStore(book, events) r := New(store, book, testLogger()) - if _, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z")); err != nil { + if _, err := r.Run(context.Background(), mustTime("2026-06-08T10:00:00Z"), mustTime("2026-06-08T11:00:00Z"), false); err != nil { t.Fatal(err) } hour := mustTime("2026-06-08T10:00:00Z") @@ -738,7 +845,7 @@ func TestRater_AppliedRateStoredOnRow(t *testing.T) { func TestRater_InvertedWindow(t *testing.T) { store := newOracleStore(bookM(), nil) r := New(store, bookM(), testLogger()) - if _, err := r.Run(context.Background(), mustTime("2026-06-08T11:00:00Z"), mustTime("2026-06-08T10:00:00Z")); err == nil { + if _, err := r.Run(context.Background(), mustTime("2026-06-08T11:00:00Z"), mustTime("2026-06-08T10:00:00Z"), false); err == nil { t.Fatal("expected error for inverted window") } } @@ -768,7 +875,7 @@ func TestOracleModel_SelfConsistent(t *testing.T) { } store := newOracleStore(book, events) r := New(store, book, testLogger()) - res, err := r.Run(context.Background(), hour, hour.Add(time.Hour)) + res, err := r.Run(context.Background(), hour, hour.Add(time.Hour), false) if err != nil { t.Fatal(err) } diff --git a/internal/rating/store_integration_test.go b/internal/rating/store_integration_test.go index 9435dac..6392ef7 100644 --- a/internal/rating/store_integration_test.go +++ b/internal/rating/store_integration_test.go @@ -963,9 +963,22 @@ func TestIntegration_RatingInstantIndexServesScan(t *testing.T) { // with auth_id, leaving window_start trailing → no index can serve a // window_start-only range scan, so without rated_usage_window_start_ix the reconcile // seq-scans rated_usage (and takes a full-trailing-window lock footprint) on every -// run. This pins that the window_start-leading index exists AND the planner CAN use -// it for the reconcile's exact predicate (seqscan disabled, so a remaining seq scan -// would mean no usable index). +// run. This pins that the window_start-leading index exists AND the planner uses it +// for the reconcile's EXACT predicate. +// +// REAL PROOF, not vacuous: an earlier version EXPLAINed a `SELECT 1` against a +// freshly-created EMPTY rated_usage. On an empty (or tiny) table the planner +// seqscans regardless of any index — and the standalone SELECT does not even +// resemble the reconcile DELETE — so a passing assertion proved nothing about the +// actual statement. Here we (a) POPULATE rated_usage with many rows across many +// hours so a narrow window-slice is genuinely cheaper by index than a seqscan, then +// (b) EXPLAIN the ACTUAL reconcile DELETE from store.go (the `deleted` CTE's +// rated_usage table-access: the window_start range plus the NOT EXISTS anti-join +// against this run's priced rows) and assert it scans rated_usage via +// rated_usage_window_start_ix. We assert the index is chosen by DEFAULT cost +// (seqscan ENABLED) — that is the load-bearing proof the index PAYS OFF for the +// reconcile's predicate, not just that one CAN be forced. A seqscan-disabled pass +// follows as a belt-and-braces check that the index can serve the predicate at all. func TestIntegration_ReconcileDeleteUsesWindowStartIndex(t *testing.T) { dsn := os.Getenv("PHOEBE_TEST_DATABASE_URL") if dsn == "" { @@ -985,14 +998,75 @@ func TestIntegration_ReconcileDeleteUsesWindowStartIndex(t *testing.T) { defer func() { exec(t, db, "DROP SCHEMA IF EXISTS "+sch+" CASCADE") }() exec(t, db, schemaDDL) - // EXPLAIN the reconcile DELETE's table-access predicate (window_start range - // alone — the `deleted` CTE's filter on rated_usage). enable_seqscan=off forces - // the planner to use an index if one CAN serve it; a remaining Seq Scan means no - // usable index (the bug this index fixes). + // POPULATE: 50 distinct auth_ids × 200 hours = 10k rated_usage rows spread across + // a wide window_start range. The reconcile DELETE targets a SINGLE hour, so the + // index-served slice is ~50 rows out of 10k — a range the planner should prefer + // the window_start index for over a full seqscan once it has stats. + exec(t, db, `INSERT INTO rated_usage + (id, auth_id, model_id, window_start, window_end, + prompt_tokens, cached_tokens, completion_tokens, billable_prompt_tokens, + cost, applied_prompt_rate, applied_cached_rate, applied_completion_rate, event_count) + SELECT + md5(a::text || ':' || h::text), + 'auth' || a::text, + 'm', + '2026-01-01T00:00:00Z'::timestamptz + (h || ' hours')::interval, + '2026-01-01T01:00:00Z'::timestamptz + (h || ' hours')::interval, + 100, 0, 0, 100, 0.001, 0.00001, 0, 0, 1 + FROM generate_series(0, 49) AS a, generate_series(0, 199) AS h`) + // ANALYZE so the planner has row-count + distribution stats (without it the + // estimates are defaults and the choice is not a real cost decision). + exec(t, db, "ANALYZE rated_usage") + + // EXPLAIN the ACTUAL reconcile DELETE statement (store.go's `deleted` CTE shape): + // the rated_usage range on window_start, anti-joined against this run's priced + // rows. `priced` is empty here (a re-run that reproduces nothing for this hour → + // the whole slice is deleted), which is exactly the worst-case reconcile that + // must still go through the index rather than seqscanning the whole table. + const reconcileDelete = `EXPLAIN + WITH priced AS ( + SELECT auth_id, model_id, window_start FROM rated_usage WHERE false + ) + DELETE FROM rated_usage ru + WHERE ru.window_start >= '2026-01-05T04:00:00Z' + AND ru.window_start < '2026-01-05T05:00:00Z' + AND NOT EXISTS ( + SELECT 1 FROM priced p + WHERE p.auth_id = ru.auth_id + AND p.model_id = ru.model_id + AND p.window_start = ru.window_start + )` + + // (1) Default-cost plan (seqscan ENABLED): the LOAD-BEARING proof the index PAYS + // OFF — the planner chooses rated_usage_window_start_ix on cost, not because + // seqscan was forbidden. This is discriminating: verified empirically, dropping + // the index makes the planner fall back to the auth-leading composite + // (auth_id, window_start) as a far costlier full-index scan (~280 vs ~8), so this + // assertion FAILS without the window_start index — it is not vacuous. (The + // fallback is an index scan, not a literal Seq Scan, because the composite still + // covers window_start as a non-leading column; the point stands — the dedicated + // window_start index is what the reconcile's window-only predicate should use.) + plan := explainPlan(t, db, reconcileDelete) + if !strings.Contains(plan, "rated_usage_window_start_ix") { + t.Fatalf("reconcile DELETE did not choose rated_usage_window_start_ix at DEFAULT cost on a populated table (a window_start-only slice should prefer the dedicated index):\n%s", plan) + } + if strings.Contains(plan, "Seq Scan on rated_usage ru") { + t.Fatalf("reconcile DELETE seq-scans rated_usage at default cost — the window_start index is not serving the range predicate:\n%s", plan) + } + + // (2) Belt-and-braces: with seqscan disabled the index must STILL be able to serve + // the predicate (a remaining seqscan would mean no usable index at all). exec(t, db, "SET enable_seqscan = off") - rows, err := db.Query(`EXPLAIN SELECT 1 FROM rated_usage - WHERE window_start >= '2026-06-08T10:00:00Z' - AND window_start < '2026-06-08T11:00:00Z'`) + plan2 := explainPlan(t, db, reconcileDelete) + if !strings.Contains(plan2, "rated_usage_window_start_ix") { + t.Fatalf("reconcile DELETE predicate cannot be served by rated_usage_window_start_ix even with seqscan off:\n%s", plan2) + } +} + +// explainPlan runs an EXPLAIN query and returns the joined plan text. +func explainPlan(t *testing.T, db *sql.DB, explainQuery string) string { + t.Helper() + rows, err := db.Query(explainQuery) if err != nil { t.Fatalf("explain: %v", err) } @@ -1008,9 +1082,7 @@ func TestIntegration_ReconcileDeleteUsesWindowStartIndex(t *testing.T) { if err := rows.Err(); err != nil { t.Fatalf("rows: %v", err) } - if !strings.Contains(plan, "rated_usage_window_start_ix") { - t.Fatalf("reconcile DELETE predicate does not use rated_usage_window_start_ix (window_start-only scan would seq-scan):\n%s", plan) - } + return plan } func exec(t *testing.T, db *sql.DB, q string) {