HomenShum · HomenShum · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026 · augmentcode
@@ -1059,6 +1059,124 @@ describe("composeAnswer — production hardening (idempotency / integrity / cach
   });
 });
 
+/* ========================================================================== */
+/* Test 1c — getAskTelemetry: /ask operability aggregate (PR C)                */
+/* mode mix · PROVIDER FAILURE RATE · quality · cost · latency · HONEST_SCORES */
+/* ========================================================================== */
+
+describe("getAskTelemetry — /ask operability aggregate", () => {
+  const getAskTelemetry = (eventsModule as any).getAskTelemetry;
+
+  function telAnswer(
+    id: string,
+    agentMode: string,
+    opts: {
+      score?: number;
+      passed?: boolean;
+      costCents?: number;
+      providerMs?: number | null;
+      liveSearches?: number;
+      createdAt: number;
+    },
+  ): TableRecord {
+    const { score = 100, passed = true, costCents = 0, providerMs = null, liveSearches = 0, createdAt } = opts;
+    const trace = providerMs != null
+      ? [{ step: "provider_llm", status: "ok", detail: "", durationMs: providerMs }]
+      : [{ step: "deterministic_synthesis", status: "ok", detail: "", durationMs: 1 }];
+    return {
+      _id: id,
+      eventId: "liveEvents:1",
+      questionMessageId: "liveEventMessages:1",
+      askedBySessionId: ANONYMOUS_SESSION_A,
+      question: "q",
+      normalizedQuestion: "q",
+      body: "b",
+      sourceIds: [],
+      trace,
+      cacheHit: agentMode === "cache",
+      agentMode,
+      estimatedCostCents: costCents,
+      externalSearches: liveSearches,
+      evaluation: { passed, score, checks: [] },
+      faqStatus: "none",
+      createdAt,
+    };
+  }
+
+  /**
+   * Scenario:    A host opens the room's /ask health view mid-event.
+   * User:        Host / operator.
+   * Goal:        See the live mode mix, provider failure rate, cost, and quality.
+   * Prior state: 7 answers — 3 provider (1 with a live search), 1 provider_fallback,
+   *              2 cache, 1 deterministic; varied quality scores + provider latencies.
+   * Scale:       1 event, 7 answers.
+   * Duration:    Single query.
+   * Expected:    Every number computed from the rows: providerAttempts excludes
+   *              cache/deterministic; failure rate = fallbacks / attempts.
+   */
+  it("aggregates mode mix, provider failure rate, cost, quality, latency from real rows", async () => {
+    const tables: Tables = {
+      liveEvents: [baseEvent()],
+      liveEventAnswers: [
+        telAnswer("liveEventAnswers:1", "provider", { score: 100, costCents: 0.36, providerMs: 4000, createdAt: 1 }),
+        telAnswer("liveEventAnswers:2", "provider", { score: 90, costCents: 0.30, providerMs: 5000, liveSearches: 1, createdAt: 2 }),
+        telAnswer("liveEventAnswers:3", "provider", { score: 80, costCents: 0.40, providerMs: 6000, createdAt: 3 }),
+        telAnswer("liveEventAnswers:4", "provider_fallback", { score: 70, passed: false, costCents: 0, providerMs: null, createdAt: 4 }),
+        telAnswer("liveEventAnswers:5", "cache", { score: 100, costCents: 0, providerMs: null, createdAt: 5 }),
+        telAnswer("liveEventAnswers:6", "cache", { score: 95, costCents: 0, providerMs: null, createdAt: 6 }),
+        telAnswer("liveEventAnswers:7", "deterministic", { score: 85, costCents: 0, providerMs: null, createdAt: 7 }),
+      ],
+    };
+    const ctx = createCtx(tables);
+    const t = await getAskTelemetry._handler(ctx, { eventId: "liveEvents:1" });
+
+    expect(t.total).toBe(7);
+    expect(t.capped).toBe(false);
+    expect(t.modes).toEqual({ provider: 3, cache: 2, deterministic: 1, provider_fallback: 1 });
+    expect(t.providerAttempts).toBe(4); // 3 provider + 1 fallback (cache/deterministic excluded)
+    expect(t.providerFailureRate).toBe(0.25); // 1 fallback / 4 attempts
+    expect(t.qualityPassRate).toBeCloseTo(6 / 7, 3); // 6 of 7 passed
+    expect(t.avgQualityScore).toBe(89); // round((100+90+80+70+100+95+85)/7)
+    expect(t.totalCostCents).toBe(1.06); // 0.36+0.30+0.40
+    expect(t.avgProviderLatencyMs).toBe(5000); // (4000+5000+6000)/3
+    expect(t.liveSearchCount).toBe(1);
+  });
+
+  /**
+   * Scenario:    Host opens /ask health for a brand-new room before anyone asked.
+   * Goal:        Must NOT fabricate "0% failures" / "100% healthy" from no data.
+   * Prior state: 0 answers.
+   * Expected:    rates are null (UI renders "—"); HONEST_SCORES invariant.
+   */
+  it("HONEST_SCORES: empty event → rates are null, never a fabricated 0% or 100%", async () => {
+    const ctx = createCtx({ liveEvents: [baseEvent()], liveEventAnswers: [] });
+    const t = await getAskTelemetry._handler(ctx, { eventId: "liveEvents:1" });
+    expect(t.total).toBe(0);
+    expect(t.providerAttempts).toBe(0);
+    expect(t.providerFailureRate).toBeNull();
+    expect(t.qualityPassRate).toBeNull();
+    expect(t.avgQualityScore).toBeNull();
+    expect(t.avgProviderLatencyMs).toBeNull();
+    expect(t.totalCostCents).toBe(0);
+  });
+
+  /**
+   * Scenario:    A high-volume room accumulates more answers than the scan cap.
+   * Goal:        BOUND — never scan unbounded; flag that the window is truncated.
+   * Prior state: 30 answers; limit 10.
+   * Expected:    total=10, capped=true (so the UI can say "last 10 of many").
+   */
+  it("BOUND: caps the scan and flags capped=true at the limit", async () => {
+    const many = Array.from({ length: 30 }, (_, i) =>
+      telAnswer(`liveEventAnswers:${i}`, "cache", { createdAt: i }),
+    );
+    const ctx = createCtx({ liveEvents: [baseEvent()], liveEventAnswers: many });
+    const t = await getAskTelemetry._handler(ctx, { eventId: "liveEvents:1", limit: 10 });
+    expect(t.total).toBe(10);
+    expect(t.capped).toBe(true);
+  });
+});
+
 /* ========================================================================== */
 /* Test 2 — publishWiki host gate                                              */
 /* ========================================================================== */

@@ -903,6 +903,78 @@ export const getAnswers = query({
   },
 });
 
+/**
+ * /ask operability telemetry (PR C) — a bounded, read-only aggregate over an
+ * event's answers, for launch-ops + host visibility into the /ask pipeline:
+ * mode mix, PROVIDER FAILURE RATE (the headline degraded-health signal),
+ * quality pass rate, cost, and provider latency.
+ *
+ * Honesty (agentic_reliability):
+ *   - BOUND: capped scan (≤1000), `capped` flag surfaced when the window is full.
+ *   - HONEST_SCORES: every number is computed from real rows; rates are null
+ *     (not a fake 0/100) when there's no denominator — the UI must show "—",
+ *     never a fabricated "100% healthy".
+ *   - No private data: liveEventAnswers are public; never touches userNotes.
+ */
+export const getAskTelemetry = query({
+  args: { eventId: v.id("liveEvents"), limit: v.optional(v.number()) },
+  handler: async (ctx, { eventId, limit }) => {
+    const cap = Math.min(Math.max(limit ?? 500, 1), 1000); // BOUND
+    const rows = await ctx.db
+      .query("liveEventAnswers")
+      .withIndex("by_event_time", (q) => q.eq("eventId", eventId))
+      .order("desc")
+      .take(cap);
+
+    const modes = { provider: 0, cache: 0, deterministic: 0, provider_fallback: 0 };
+    let costCentsTotal = 0;
+    let qualitySum = 0;
+    let qualityCount = 0;
+    let passCount = 0;
+    let providerLatencySum = 0;
+    let providerLatencyCount = 0;
+    let liveSearchCount = 0;
+
+    for (const r of rows) {
+      const mode = (r.agentMode ?? "deterministic") as keyof typeof modes;
+      if (mode in modes) modes[mode] += 1;
+      costCentsTotal += r.estimatedCostCents ?? 0;
+      liveSearchCount += r.externalSearches ?? 0;
+      if (r.evaluation) {
+        qualitySum += r.evaluation.score ?? 0;
+        qualityCount += 1;
+        if (r.evaluation.passed) passCount += 1;
+      }
+      const provStep = (r.trace ?? []).find(
+        (s: any) => s.step === "provider_llm" && s.status === "ok",
+      );
+      if (provStep) {
+        providerLatencySum += provStep.durationMs ?? 0;
+        providerLatencyCount += 1;
+      }
+    }
+
+    // Provider failure rate = fallbacks / (real provider ATTEMPTS). A provider
+    // attempt is a success (mode=provider) OR a fallback (mode=provider_fallback);
+    // cache/deterministic never reached the provider, so they're excluded from
+    // the denominator. Null when no attempts — no fabricated "0% failures".
+    const providerAttempts = modes.provider + modes.provider_fallback;
+    const round = (x: number, p: number) => Math.round(x * 10 ** p) / 10 ** p;
+    return {
+      total: rows.length,
+      capped: rows.length >= cap,
+      modes,
+      providerAttempts,
+      providerFailureRate: providerAttempts > 0 ? round(modes.provider_fallback / providerAttempts, 3) : null,
+      qualityPassRate: qualityCount > 0 ? round(passCount / qualityCount, 3) : null,
+      avgQualityScore: qualityCount > 0 ? Math.round(qualitySum / qualityCount) : null,
+      totalCostCents: round(costCentsTotal, 4),
+      avgProviderLatencyMs: providerLatencyCount > 0 ? Math.round(providerLatencySum / providerLatencyCount) : null,
+      liveSearchCount,
+    };
+  },
+});
+
 export const getHostStatus = query({
   args: {
     eventId: v.id("liveEvents"),