diff --git a/packages/core/test/deepinfra.test.ts b/packages/core/test/deepinfra.test.ts new file mode 100644 index 000000000..46833b8ac --- /dev/null +++ b/packages/core/test/deepinfra.test.ts @@ -0,0 +1,88 @@ +import { expect, test } from "bun:test"; +import path from "node:path"; + +import { generate } from "../src/index.js"; + +test("DeepInfra models expose only verified reasoning controls", async () => { + const root = path.join(import.meta.dirname, "..", "..", ".."); + const deepinfra = (await generate(path.join(root, "providers"))).deepinfra; + + expect(deepinfra).toBeDefined(); + + const toggle = [ + "XiaomiMiMo/MiMo-V2.5-Pro", + "XiaomiMiMo/MiMo-V2.5", + "deepseek-ai/DeepSeek-V3.2", + "google/gemma-4-26B-A4B-it", + "google/gemma-4-31B-it", + "moonshotai/Kimi-K2.6", + "zai-org/GLM-4.6", + "zai-org/GLM-4.7", + "zai-org/GLM-5.1", + "zai-org/GLM-5", + ]; + const standardEffort = ["openai/gpt-oss-120b", "openai/gpt-oss-20b"]; + const r1 = ["deepseek-ai/DeepSeek-R1-0528"]; + const v4 = [ + "deepseek-ai/DeepSeek-V4-Flash", + "deepseek-ai/DeepSeek-V4-Pro", + ]; + const nonReasoning = [ + "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo", + "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "meta-llama/Llama-4-Scout-17B-16E-Instruct", + ]; + const unresolved = [ + "MiniMaxAI/MiniMax-M2.5", + "Qwen/Qwen3.5-35B-A3B", + "Qwen/Qwen3.5-397B-A17B", + "Qwen/Qwen3.6-35B-A3B", + "moonshotai/Kimi-K2.5", + "zai-org/GLM-4.7-Flash", + ]; + + expect(Object.keys(deepinfra?.models ?? {}).sort()).toEqual( + [ + ...toggle, + ...standardEffort, + ...r1, + ...v4, + ...nonReasoning, + ...unresolved, + ].sort(), + ); + + for (const id of toggle) { + expect(deepinfra?.models[id]?.reasoning).toBe(true); + expect(deepinfra?.models[id]?.reasoning_options).toEqual([{ type: "toggle" }]); + } + for (const id of standardEffort) { + expect(deepinfra?.models[id]?.reasoning).toBe(true); + expect(deepinfra?.models[id]?.reasoning_options).toEqual([ + { type: "effort", values: ["low", "medium", "high"] }, + ]); + } + for (const id of r1) { + expect(deepinfra?.models[id]?.reasoning).toBe(true); + expect(deepinfra?.models[id]?.reasoning_options).toEqual([ + { type: "toggle" }, + { type: "effort", values: ["low", "medium", "high"] }, + ]); + } + for (const id of v4) { + expect(deepinfra?.models[id]?.reasoning).toBe(true); + expect(deepinfra?.models[id]?.reasoning_options).toEqual([ + { type: "toggle" }, + { type: "effort", values: ["high"] }, + ]); + } + for (const id of nonReasoning) { + expect(deepinfra?.models[id]?.reasoning).toBe(false); + expect(deepinfra?.models[id]?.reasoning_options).toBeUndefined(); + } + for (const id of unresolved) { + expect(deepinfra?.models[id]?.reasoning).toBe(true); + expect(deepinfra?.models[id]?.reasoning_options).toBeUndefined(); + } +}); diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml index 0a46f9656..0c49851d3 100644 --- a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml +++ b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml @@ -5,20 +5,19 @@ release_date = "2026-02-12" last_updated = "2026-02-12" attachment = false reasoning = true -reasoning_options = [] temperature = true tool_call = true knowledge = "2025-06" open_weights = true [cost] -input = 0.27 -output = 0.95 +input = 0.15 +output = 1.15 cache_read = 0.03 cache_write = 0.375 [limit] -context = 204_800 +context = 196_608 output = 131_072 [modalities] diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml index d8cedbb09..44dd1e436 100644 --- a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml @@ -4,7 +4,6 @@ release_date = "2025-07-23" last_updated = "2025-07-23" attachment = false reasoning = false -reasoning_options = [] temperature = true knowledge = "2025-04" tool_call = true @@ -12,7 +11,7 @@ open_weights = true [cost] input = 0.3 -output = 1.2 +output = 1.0 [limit] context = 262_144 diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml index 772dd6d1e..3475b9525 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml @@ -4,16 +4,15 @@ release_date = "2026-02-01" last_updated = "2026-04-20" attachment = true reasoning = true -reasoning_options = [{ type = "toggle" }] temperature = true knowledge = "2025-01" tool_call = true open_weights = true [cost] -input = 0.2 -output = 0.95 -cached = 0.1 +input = 0.14 +output = 1.00 +cached = 0.05 [limit] context = 262_144 diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml index 8a7c912a5..10c8fa71c 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml @@ -4,16 +4,15 @@ release_date = "2026-02-01" last_updated = "2026-04-20" attachment = true reasoning = true -reasoning_options = [{ type = "toggle" }] temperature = true knowledge = "2025-01" tool_call = true open_weights = true [cost] -input = 0.54 -output = 3.4 -cached = 0.27 +input = 0.45 +output = 3.00 +cached = 0.22 [limit] context = 262_144 diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml index 6208ed020..5c1c94a60 100644 --- a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml +++ b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml @@ -4,14 +4,13 @@ release_date = "2026-04-01" last_updated = "2026-04-01" attachment = true reasoning = true -reasoning_options = [{ type = "toggle" }] temperature = true tool_call = true open_weights = true [cost] -input = 0.20 -output = 1.00 +input = 0.15 +output = 0.95 [limit] context = 262_144 diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml index bd080cd58..7e9e7b96e 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml @@ -3,10 +3,10 @@ release_date = "2025-05-28" last_updated = "2025-05-28" attachment = false reasoning = true -reasoning_options = [] +reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["low", "medium", "high"] }] temperature = true knowledge = "2024-07" -tool_call = false +tool_call = true open_weights = false [interleaved] diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml index 6c708c932..462a42e56 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml @@ -1,5 +1,5 @@ base_model = "deepseek/deepseek-v4-flash" -reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["low", "medium", "high", "xhigh"] }] +reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["high"] }] [interleaved] field = "reasoning_content" diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml index fb50a1e0f..d576e311d 100644 --- a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml +++ b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml @@ -1,5 +1,5 @@ base_model = "deepseek/deepseek-v4-pro" -reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["low", "medium", "high", "xhigh"] }] +reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["high"] }] [interleaved] field = "reasoning_content" diff --git a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml index e2ca7401f..df433558b 100644 --- a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml +++ b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml @@ -4,7 +4,6 @@ release_date = "2024-12-06" last_updated = "2024-12-06" attachment = false reasoning = false -reasoning_options = [] tool_call = true open_weights = true diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml index 98ca558fc..e56d05fac 100644 --- a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml +++ b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml @@ -4,8 +4,7 @@ release_date = "2025-04-05" last_updated = "2025-04-05" attachment = false reasoning = false -reasoning_options = [] -tool_call = true +tool_call = false open_weights = true [cost] @@ -13,7 +12,7 @@ input = 0.15 output = 0.60 [limit] -context = 1_000_000 +context = 1_048_576 output = 16_384 [modalities] diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml index 0bcfdc0c0..12d7d50db 100644 --- a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml +++ b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml @@ -4,16 +4,15 @@ release_date = "2025-04-05" last_updated = "2025-04-05" attachment = false reasoning = false -reasoning_options = [] tool_call = true open_weights = true [cost] -input = 0.08 +input = 0.10 output = 0.30 [limit] -context = 10_000_000 +context = 327_680 output = 16_384 [modalities] diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml index 17a816502..e264128e6 100644 --- a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml +++ b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml @@ -4,7 +4,6 @@ release_date = "2026-01-27" last_updated = "2026-01-27" attachment = true reasoning = true -reasoning_options = [{ type = "toggle" }] structured_output = true temperature = true tool_call = true @@ -12,9 +11,9 @@ knowledge = "2025-01" open_weights = true [cost] -input = 0.50 -output = 2.80 -cached_input = 0.09 +input = 0.45 +output = 2.25 +cached_input = 0.07 [limit] context = 262_144 diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml index 6da7ba780..734ae2d0f 100644 --- a/providers/deepinfra/models/openai/gpt-oss-120b.toml +++ b/providers/deepinfra/models/openai/gpt-oss-120b.toml @@ -12,8 +12,8 @@ tool_call = true open_weights = true [cost] -input = 0.05 -output = 0.24 +input = 0.039 +output = 0.19 [limit] context = 131_072 diff --git a/providers/deepinfra/models/zai-org/GLM-4.6.toml b/providers/deepinfra/models/zai-org/GLM-4.6.toml index 614f0d683..1485de47a 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.6.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.6.toml @@ -20,7 +20,7 @@ output = 1.74 cache_read = 0.08 [limit] -context = 204_800 +context = 202_752 output = 131_072 [modalities] diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml index b64fc596e..dbb42a2d8 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml @@ -6,7 +6,6 @@ last_updated = "2026-01-19" knowledge = "2025-04" attachment = false reasoning = true -reasoning_options = [{ type = "toggle" }] temperature = true tool_call = true open_weights = true diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml index 0291e9572..f90cf3741 100644 --- a/providers/deepinfra/models/zai-org/GLM-4.7.toml +++ b/providers/deepinfra/models/zai-org/GLM-4.7.toml @@ -15,7 +15,7 @@ open_weights = true field = "reasoning_content" [cost] -input = 0.43 +input = 0.40 output = 1.75 cache_read = 0.08 diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml index b2624d58a..8ec261580 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.1.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.1.toml @@ -16,9 +16,9 @@ structured_output = true field = "reasoning_content" [cost] -input = 1.4 -output = 4.4 -cache_read = 0.26 +input = 1.05 +output = 3.50 +cache_read = 0.205 [limit] context = 202_752 diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml index 595b286fc..366f5fe88 100644 --- a/providers/deepinfra/models/zai-org/GLM-5.toml +++ b/providers/deepinfra/models/zai-org/GLM-5.toml @@ -15,9 +15,9 @@ open_weights = true field = "reasoning_content" [cost] -input = 0.8 -output = 2.56 -cache_read = 0.16 +input = 0.60 +output = 2.08 +cache_read = 0.12 [limit] context = 202_752