Skip to content

Commit 8540cdb

Browse files
feat(web): add language model inputModalities capability plumbing
Add an optional `inputModalities` declaration to language model config and expose a resolved capability set to the client. - Schema: add optional `inputModalities` (`text` | `image` | `pdf`) to every provider definition in `schemas/v3/languageModel.json` and regenerate the schema types/snippets. - Add a fail-closed `resolveModelInputModalities` resolver that defaults to text-only when a model does not declare its input modalities. - Expose the resolved `inputModalities` on the client-safe `LanguageModelInfo` (populated via `getConfiguredLanguageModelsInfo` and the MCP ask path). This is groundwork for chat file attachments. It adds no attachment UI and no live provider capability probing yet. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 5e1b8ee commit 8540cdb

11 files changed

Lines changed: 1306 additions & 1 deletion

File tree

docs/snippets/schemas/v3/index.schema.mdx

Lines changed: 264 additions & 0 deletions
Large diffs are not rendered by default.

docs/snippets/schemas/v3/languageModel.schema.mdx

Lines changed: 264 additions & 0 deletions
Large diffs are not rendered by default.

packages/schemas/src/v3/index.schema.ts

Lines changed: 264 additions & 0 deletions
Large diffs are not rendered by default.

packages/schemas/src/v3/index.type.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,10 @@ export interface AmazonBedrockLanguageModel {
762762
*/
763763
temperature?: number;
764764
headers?: LanguageModelHeaders;
765+
/**
766+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
767+
*/
768+
inputModalities?: ("text" | "image" | "pdf")[];
765769
}
766770
/**
767771
* Optional headers to use with the model.
@@ -842,6 +846,10 @@ export interface AnthropicLanguageModel {
842846
*/
843847
temperature?: number;
844848
headers?: LanguageModelHeaders;
849+
/**
850+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
851+
*/
852+
inputModalities?: ("text" | "image" | "pdf")[];
845853
}
846854
export interface AzureLanguageModel {
847855
/**
@@ -897,6 +905,10 @@ export interface AzureLanguageModel {
897905
*/
898906
temperature?: number;
899907
headers?: LanguageModelHeaders;
908+
/**
909+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
910+
*/
911+
inputModalities?: ("text" | "image" | "pdf")[];
900912
}
901913
export interface DeepSeekLanguageModel {
902914
/**
@@ -936,6 +948,10 @@ export interface DeepSeekLanguageModel {
936948
*/
937949
temperature?: number;
938950
headers?: LanguageModelHeaders;
951+
/**
952+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
953+
*/
954+
inputModalities?: ("text" | "image" | "pdf")[];
939955
}
940956
export interface GoogleGenerativeAILanguageModel {
941957
/**
@@ -983,6 +999,10 @@ export interface GoogleGenerativeAILanguageModel {
983999
*/
9841000
temperature?: number;
9851001
headers?: LanguageModelHeaders;
1002+
/**
1003+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
1004+
*/
1005+
inputModalities?: ("text" | "image" | "pdf")[];
9861006
}
9871007
export interface GoogleVertexAnthropicLanguageModel {
9881008
/**
@@ -1030,6 +1050,10 @@ export interface GoogleVertexAnthropicLanguageModel {
10301050
*/
10311051
temperature?: number;
10321052
headers?: LanguageModelHeaders;
1053+
/**
1054+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
1055+
*/
1056+
inputModalities?: ("text" | "image" | "pdf")[];
10331057
}
10341058
export interface GoogleVertexLanguageModel {
10351059
/**
@@ -1085,6 +1109,10 @@ export interface GoogleVertexLanguageModel {
10851109
*/
10861110
temperature?: number;
10871111
headers?: LanguageModelHeaders;
1112+
/**
1113+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
1114+
*/
1115+
inputModalities?: ("text" | "image" | "pdf")[];
10881116
}
10891117
export interface MistralLanguageModel {
10901118
/**
@@ -1124,6 +1152,10 @@ export interface MistralLanguageModel {
11241152
*/
11251153
temperature?: number;
11261154
headers?: LanguageModelHeaders;
1155+
/**
1156+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
1157+
*/
1158+
inputModalities?: ("text" | "image" | "pdf")[];
11271159
}
11281160
export interface OpenAILanguageModel {
11291161
/**
@@ -1171,6 +1203,10 @@ export interface OpenAILanguageModel {
11711203
*/
11721204
temperature?: number;
11731205
headers?: LanguageModelHeaders;
1206+
/**
1207+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
1208+
*/
1209+
inputModalities?: ("text" | "image" | "pdf")[];
11741210
}
11751211
export interface OpenAICompatibleLanguageModel {
11761212
/**
@@ -1215,6 +1251,10 @@ export interface OpenAICompatibleLanguageModel {
12151251
* Optional temperature setting to use with the model.
12161252
*/
12171253
temperature?: number;
1254+
/**
1255+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
1256+
*/
1257+
inputModalities?: ("text" | "image" | "pdf")[];
12181258
}
12191259
/**
12201260
* Optional query parameters to include in the request url.
@@ -1279,6 +1319,10 @@ export interface OpenRouterLanguageModel {
12791319
*/
12801320
temperature?: number;
12811321
headers?: LanguageModelHeaders;
1322+
/**
1323+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
1324+
*/
1325+
inputModalities?: ("text" | "image" | "pdf")[];
12821326
}
12831327
export interface XaiLanguageModel {
12841328
/**
@@ -1318,6 +1362,10 @@ export interface XaiLanguageModel {
13181362
*/
13191363
temperature?: number;
13201364
headers?: LanguageModelHeaders;
1365+
/**
1366+
* Optional list of input modalities this model can accept. Used to gate non-text chat attachments. When omitted, the model is treated as text-only (fail-closed).
1367+
*/
1368+
inputModalities?: ("text" | "image" | "pdf")[];
13211369
}
13221370
export interface GitHubAppConfig {
13231371
/**

0 commit comments

Comments
 (0)