@@ -13,8 +13,10 @@ const FETCH_TIMEOUT_MS = 8000;
1313// Re-fetch the (~2.4 MB) catalog at most once per this interval per server
1414// process. New models trickle in daily; a stale window for a few hours is fine.
1515const CATALOG_TTL_MS = 6 * 60 * 60 * 1000 ;
16- // After a failed fetch, don't reattempt for this long. Without it, an outage in
17- // models.dev would make every chat send pay the fetch timeout on the request path.
16+ // After a failed fetch, don't reattempt for this long. Since the request path
17+ // never blocks on the fetch (see loadCatalog), this throttles background
18+ // refresh attempts to once per interval during a models.dev outage instead of
19+ // kicking one off on (nearly) every request.
1820const NEGATIVE_CACHE_MS = 60 * 1000 ;
1921
2022// Sourcebot provider id -> models.dev top-level catalog key. Only providers
@@ -75,9 +77,16 @@ const fetchCatalog = async (): Promise<ModelsDevCatalog | null> => {
7577
7678/**
7779 * Returns the cached models.dev catalog, refreshing it in the background when
78- * stale. Only the very first load blocks on the network; thereafter the
79- * last-known-good catalog is served immediately (even if stale) so the request
80- * path never waits on models.dev.
80+ * stale. The request path NEVER blocks on the network: the last-known-good
81+ * catalog is returned immediately (even if stale), or null before the first
82+ * successful fetch lands, and any refresh settles in the background.
83+ *
84+ * Consequences of never awaiting:
85+ * - For the brief window after a cold start (before the first fetch resolves),
86+ * capability resolution falls back to text-only; it self-heals on the next
87+ * request once the background fetch populates the cache.
88+ * - An unreachable catalog (e.g. an airgapped deployment) costs nothing on the
89+ * request path instead of repeatedly paying the fetch timeout.
8190 */
8291export const loadCatalog = async ( ) : Promise < ModelsDevCatalog | null > => {
8392 const now = Date . now ( ) ;
@@ -87,7 +96,8 @@ export const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
8796 // Kick off a (deduped) refresh when the cache is stale/empty and we're not
8897 // within the post-failure backoff window. On success it replaces the cache;
8998 // on failure it only records the failure time, leaving the last-known-good
90- // catalog intact.
99+ // catalog intact. The promise is intentionally not awaited here so the
100+ // request path never waits on models.dev.
91101 if ( ! isFresh && ! isBackingOff && ! inFlightFetch ) {
92102 inFlightFetch = fetchCatalog ( ) . then ( ( catalog ) => {
93103 if ( catalog ) {
@@ -101,11 +111,7 @@ export const loadCatalog = async (): Promise<ModelsDevCatalog | null> => {
101111 } ) ;
102112 }
103113
104- // Once a catalog has loaded once, never block the request path on the
105- // network: serve the last-known-good value (even if stale) and let any
106- // refresh settle in the background. Only the very first load awaits.
107- if ( cachedCatalog !== null ) {
108- return cachedCatalog ;
109- }
110- return inFlightFetch ?? null ;
114+ // Serve whatever we currently have cached (possibly null on a cold start)
115+ // and let any in-flight refresh settle in the background.
116+ return cachedCatalog ;
111117} ;
0 commit comments