Skip to content

Commit dbcfc8a

Browse files
committed
remove blocking models.dev catalog request and add cache warm on startup
1 parent bf79260 commit dbcfc8a

3 files changed

Lines changed: 45 additions & 16 deletions

File tree

packages/web/src/features/chat/modelCapabilities.server.test.ts

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,17 +104,28 @@ describe('resolveModelCapabilities', () => {
104104
vi.unstubAllGlobals();
105105
});
106106

107-
test('fetches the catalog once and resolves capabilities (incl. provider mapping)', async () => {
107+
test('fetches the catalog once in the background and resolves capabilities (incl. provider mapping)', async () => {
108108
const fetchMock = vi.fn(async () => ({
109109
ok: true,
110110
json: async () => catalog,
111111
}) as unknown as Response);
112112
vi.stubGlobal('fetch', fetchMock);
113113

114+
// The request path never blocks on the fetch: the first lookup kicks off
115+
// the background fetch and falls back to text-only while it's in flight.
114116
expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({
115-
inputModalities: ['text', 'image'],
116-
supportedDocumentTypes: ['pdf'],
117+
inputModalities: ['text'],
118+
supportedDocumentTypes: [],
117119
});
120+
121+
// Once the background fetch settles, lookups resolve from the cached catalog.
122+
await vi.waitFor(async () => {
123+
expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({
124+
inputModalities: ['text', 'image'],
125+
supportedDocumentTypes: ['pdf'],
126+
});
127+
});
128+
118129
// Subsequent lookups reuse the cached catalog rather than refetching.
119130
expect(await resolveModelCapabilities(model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({
120131
inputModalities: ['text', 'image', 'audio', 'video'],

packages/web/src/features/chat/modelsDevCatalog.server.ts

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ const FETCH_TIMEOUT_MS = 8000;
1313
// Re-fetch the (~2.4 MB) catalog at most once per this interval per server
1414
// process. New models trickle in daily; a stale window for a few hours is fine.
1515
const CATALOG_TTL_MS = 6 * 60 * 60 * 1000;
16-
// After a failed fetch, don't reattempt for this long. Without it, an outage in
17-
// models.dev would make every chat send pay the fetch timeout on the request path.
16+
// After a failed fetch, don't reattempt for this long. Since the request path
17+
// never blocks on the fetch (see loadCatalog), this throttles background
18+
// refresh attempts to once per interval during a models.dev outage instead of
19+
// kicking one off on (nearly) every request.
1820
const NEGATIVE_CACHE_MS = 60 * 1000;
1921

2022
// Sourcebot provider id -> models.dev top-level catalog key. Only providers
@@ -75,9 +77,16 @@ const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
7577

7678
/**
7779
* Returns the cached models.dev catalog, refreshing it in the background when
78-
* stale. Only the very first load blocks on the network; thereafter the
79-
* last-known-good catalog is served immediately (even if stale) so the request
80-
* path never waits on models.dev.
80+
* stale. The request path NEVER blocks on the network: the last-known-good
81+
* catalog is returned immediately (even if stale), or null before the first
82+
* successful fetch lands, and any refresh settles in the background.
83+
*
84+
* Consequences of never awaiting:
85+
* - For the brief window after a cold start (before the first fetch resolves),
86+
* capability resolution falls back to text-only; it self-heals on the next
87+
* request once the background fetch populates the cache.
88+
* - An unreachable catalog (e.g. an airgapped deployment) costs nothing on the
89+
* request path instead of repeatedly paying the fetch timeout.
8190
*/
8291
export const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
8392
const now = Date.now();
@@ -87,7 +96,8 @@ export const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
8796
// Kick off a (deduped) refresh when the cache is stale/empty and we're not
8897
// within the post-failure backoff window. On success it replaces the cache;
8998
// on failure it only records the failure time, leaving the last-known-good
90-
// catalog intact.
99+
// catalog intact. The promise is intentionally not awaited here so the
100+
// request path never waits on models.dev.
91101
if (!isFresh && !isBackingOff && !inFlightFetch) {
92102
inFlightFetch = fetchCatalog().then((catalog) => {
93103
if (catalog) {
@@ -101,11 +111,7 @@ export const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
101111
});
102112
}
103113

104-
// Once a catalog has loaded once, never block the request path on the
105-
// network: serve the last-known-good value (even if stale) and let any
106-
// refresh settle in the background. Only the very first load awaits.
107-
if (cachedCatalog !== null) {
108-
return cachedCatalog;
109-
}
110-
return inFlightFetch ?? null;
114+
// Serve whatever we currently have cached (possibly null on a cold start)
115+
// and let any in-flight refresh settle in the background.
116+
return cachedCatalog;
111117
};

packages/web/src/initialize.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import { startChangelogPollingJob } from '@/features/changelog/pollChangelog';
44
import { createLogger, env } from "@sourcebot/shared";
55
import { hasEntitlement } from '@/lib/entitlements';
66
import { SINGLE_TENANT_ORG_ID } from './lib/constants';
7+
import { getConfiguredLanguageModels } from '@/features/chat/utils.server';
8+
import { loadCatalog } from '@/features/chat/modelsDevCatalog.server';
79

810
const logger = createLogger('web-initialize');
911

@@ -73,8 +75,18 @@ const init = async () => {
7375
}
7476
}
7577

78+
const warmModelCapabilitiesCatalog = async () => {
79+
const configuredModels = await getConfiguredLanguageModels();
80+
if (configuredModels.length === 0) {
81+
return;
82+
}
83+
logger.info(`Warming models.dev capability catalog for ${configuredModels.length} configured language model(s)`);
84+
void loadCatalog();
85+
};
86+
7687
(async () => {
7788
await init();
7889
startServicePingCronJob();
7990
startChangelogPollingJob();
91+
await warmModelCapabilitiesCatalog();
8092
})();

0 commit comments

Comments
 (0)