remove blocking models.dev catalog request and add cache warm on startup

whoisthey · whoisthey · commit dbcfc8a60364 · 2026-06-27T13:28:33.000-07:00
diff --git a/packages/web/src/features/chat/modelCapabilities.server.test.ts b/packages/web/src/features/chat/modelCapabilities.server.test.ts
@@ -104,17 +104,28 @@ describe('resolveModelCapabilities', () => {
         vi.unstubAllGlobals();
     });
 
-    test('fetches the catalog once and resolves capabilities (incl. provider mapping)', async () => {
+    test('fetches the catalog once in the background and resolves capabilities (incl. provider mapping)', async () => {
         const fetchMock = vi.fn(async () => ({
             ok: true,
             json: async () => catalog,
         }) as unknown as Response);
         vi.stubGlobal('fetch', fetchMock);
 
+        // The request path never blocks on the fetch: the first lookup kicks off
+        // the background fetch and falls back to text-only while it's in flight.
         expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({
-            inputModalities: ['text', 'image'],
-            supportedDocumentTypes: ['pdf'],
+            inputModalities: ['text'],
+            supportedDocumentTypes: [],
         });
+
+        // Once the background fetch settles, lookups resolve from the cached catalog.
+        await vi.waitFor(async () => {
+            expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({
+                inputModalities: ['text', 'image'],
+                supportedDocumentTypes: ['pdf'],
+            });
+        });
+
         // Subsequent lookups reuse the cached catalog rather than refetching.
         expect(await resolveModelCapabilities(model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({
             inputModalities: ['text', 'image', 'audio', 'video'],
diff --git a/packages/web/src/features/chat/modelsDevCatalog.server.ts b/packages/web/src/features/chat/modelsDevCatalog.server.ts
@@ -13,8 +13,10 @@ const FETCH_TIMEOUT_MS = 8000;
 // Re-fetch the (~2.4 MB) catalog at most once per this interval per server
 // process. New models trickle in daily; a stale window for a few hours is fine.
 const CATALOG_TTL_MS = 6 * 60 * 60 * 1000;
-// After a failed fetch, don't reattempt for this long. Without it, an outage in
-// models.dev would make every chat send pay the fetch timeout on the request path.
+// After a failed fetch, don't reattempt for this long. Since the request path
+// never blocks on the fetch (see loadCatalog), this throttles background
+// refresh attempts to once per interval during a models.dev outage instead of
+// kicking one off on (nearly) every request.
 const NEGATIVE_CACHE_MS = 60 * 1000;
 
 // Sourcebot provider id -> models.dev top-level catalog key. Only providers
@@ -75,9 +77,16 @@ const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
 
 /**
  * Returns the cached models.dev catalog, refreshing it in the background when
- * stale. Only the very first load blocks on the network; thereafter the
- * last-known-good catalog is served immediately (even if stale) so the request
- * path never waits on models.dev.
+ * stale. The request path NEVER blocks on the network: the last-known-good
+ * catalog is returned immediately (even if stale), or null before the first
+ * successful fetch lands, and any refresh settles in the background.
+ *
+ * Consequences of never awaiting:
+ * - For the brief window after a cold start (before the first fetch resolves),
+ *   capability resolution falls back to text-only; it self-heals on the next
+ *   request once the background fetch populates the cache.
+ * - An unreachable catalog (e.g. an airgapped deployment) costs nothing on the
+ *   request path instead of repeatedly paying the fetch timeout.
  */
 export const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
     const now = Date.now();
@@ -87,7 +96,8 @@ export const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
     // Kick off a (deduped) refresh when the cache is stale/empty and we're not
     // within the post-failure backoff window. On success it replaces the cache;
     // on failure it only records the failure time, leaving the last-known-good
-    // catalog intact.
+    // catalog intact. The promise is intentionally not awaited here so the
+    // request path never waits on models.dev.
     if (!isFresh && !isBackingOff && !inFlightFetch) {
         inFlightFetch = fetchCatalog().then((catalog) => {
             if (catalog) {
@@ -101,11 +111,7 @@ export const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
         });
     }
 
-    // Once a catalog has loaded once, never block the request path on the
-    // network: serve the last-known-good value (even if stale) and let any
-    // refresh settle in the background. Only the very first load awaits.
-    if (cachedCatalog !== null) {
-        return cachedCatalog;
-    }
-    return inFlightFetch ?? null;
+    // Serve whatever we currently have cached (possibly null on a cold start)
+    // and let any in-flight refresh settle in the background.
+    return cachedCatalog;
 };
diff --git a/packages/web/src/initialize.ts b/packages/web/src/initialize.ts
@@ -4,6 +4,8 @@ import { startChangelogPollingJob } from '@/features/changelog/pollChangelog';
 import { createLogger, env } from "@sourcebot/shared";
 import { hasEntitlement } from '@/lib/entitlements';
 import { SINGLE_TENANT_ORG_ID } from './lib/constants';
+import { getConfiguredLanguageModels } from '@/features/chat/utils.server';
+import { loadCatalog } from '@/features/chat/modelsDevCatalog.server';
 
 const logger = createLogger('web-initialize');
 
@@ -73,8 +75,18 @@ const init = async () => {
     }
 }
 
+const warmModelCapabilitiesCatalog = async () => {
+    const configuredModels = await getConfiguredLanguageModels();
+    if (configuredModels.length === 0) {
+        return;
+    }
+    logger.info(`Warming models.dev capability catalog for ${configuredModels.length} configured language model(s)`);
+    void loadCatalog();
+};
+
 (async () => {
     await init();
     startServicePingCronJob();
     startChangelogPollingJob();
+    await warmModelCapabilitiesCatalog();
 })();