anomalyco · rekram1-node · Jun 10, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/packages/core/test/deepinfra.test.ts b/packages/core/test/deepinfra.test.ts
@@ -0,0 +1,88 @@
+import { expect, test } from "bun:test";
+import path from "node:path";
+
+import { generate } from "../src/index.js";
+
+test("DeepInfra models expose only verified reasoning controls", async () => {
+  const root = path.join(import.meta.dirname, "..", "..", "..");
+  const deepinfra = (await generate(path.join(root, "providers"))).deepinfra;
+
+  expect(deepinfra).toBeDefined();
+
+  const toggle = [
+    "XiaomiMiMo/MiMo-V2.5-Pro",
+    "XiaomiMiMo/MiMo-V2.5",
+    "deepseek-ai/DeepSeek-V3.2",
+    "google/gemma-4-26B-A4B-it",
+    "google/gemma-4-31B-it",
+    "moonshotai/Kimi-K2.6",
+    "zai-org/GLM-4.6",
+    "zai-org/GLM-4.7",
+    "zai-org/GLM-5.1",
+    "zai-org/GLM-5",
+  ];
+  const standardEffort = ["openai/gpt-oss-120b", "openai/gpt-oss-20b"];
+  const r1 = ["deepseek-ai/DeepSeek-R1-0528"];
+  const v4 = [
+    "deepseek-ai/DeepSeek-V4-Flash",
+    "deepseek-ai/DeepSeek-V4-Pro",
+  ];
+  const nonReasoning = [
+    "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo",
+    "meta-llama/Llama-3.3-70B-Instruct-Turbo",
+    "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+    "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  ];
+  const unresolved = [
+    "MiniMaxAI/MiniMax-M2.5",
+    "Qwen/Qwen3.5-35B-A3B",
+    "Qwen/Qwen3.5-397B-A17B",
+    "Qwen/Qwen3.6-35B-A3B",
+    "moonshotai/Kimi-K2.5",
+    "zai-org/GLM-4.7-Flash",
+  ];
+
+  expect(Object.keys(deepinfra?.models ?? {}).sort()).toEqual(
+    [
+      ...toggle,
+      ...standardEffort,
+      ...r1,
+      ...v4,
+      ...nonReasoning,
+      ...unresolved,
+    ].sort(),
+  );
+
+  for (const id of toggle) {
+    expect(deepinfra?.models[id]?.reasoning).toBe(true);
+    expect(deepinfra?.models[id]?.reasoning_options).toEqual([{ type: "toggle" }]);
+  }
+  for (const id of standardEffort) {
+    expect(deepinfra?.models[id]?.reasoning).toBe(true);
+    expect(deepinfra?.models[id]?.reasoning_options).toEqual([
+      { type: "effort", values: ["low", "medium", "high"] },
+    ]);
+  }
+  for (const id of r1) {
+    expect(deepinfra?.models[id]?.reasoning).toBe(true);
+    expect(deepinfra?.models[id]?.reasoning_options).toEqual([
+      { type: "toggle" },
+      { type: "effort", values: ["low", "medium", "high"] },
+    ]);
+  }
+  for (const id of v4) {
+    expect(deepinfra?.models[id]?.reasoning).toBe(true);
+    expect(deepinfra?.models[id]?.reasoning_options).toEqual([
+      { type: "toggle" },
+      { type: "effort", values: ["high"] },
+    ]);
+  }
+  for (const id of nonReasoning) {
+    expect(deepinfra?.models[id]?.reasoning).toBe(false);
+    expect(deepinfra?.models[id]?.reasoning_options).toBeUndefined();
+  }
+  for (const id of unresolved) {
+    expect(deepinfra?.models[id]?.reasoning).toBe(true);
+    expect(deepinfra?.models[id]?.reasoning_options).toBeUndefined();
+  }
+});
diff --git a/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml b/providers/deepinfra/models/MiniMaxAI/MiniMax-M2.5.toml
@@ -5,20 +5,19 @@ release_date = "2026-02-12"
 last_updated = "2026-02-12"
 attachment = false
 reasoning = true
-reasoning_options = []
 temperature = true
 tool_call = true
 knowledge = "2025-06"
 open_weights = true
 
 [cost]
-input = 0.27
-output = 0.95
+input = 0.15
+output = 1.15
 cache_read = 0.03
 cache_write = 0.375
 
 [limit]
-context = 204_800
+context = 196_608
 output = 131_072
 
 [modalities]

diff --git a/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml b/providers/deepinfra/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo.toml
@@ -4,15 +4,14 @@ release_date = "2025-07-23"
 last_updated = "2025-07-23"
 attachment = false
 reasoning = false
-reasoning_options = []
 temperature = true
 knowledge = "2025-04"
 tool_call = true
 open_weights = true
 
 [cost]
 input = 0.3
-output = 1.2
+output = 1.0
 
 [limit]
 context = 262_144

diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-35B-A3B.toml
@@ -4,16 +4,15 @@ release_date = "2026-02-01"
 last_updated = "2026-04-20"
 attachment = true
 reasoning = true
-reasoning_options = [{ type = "toggle" }]
 temperature = true
 knowledge = "2025-01"
 tool_call = true
 open_weights = true
 
 [cost]
-input = 0.2
-output = 0.95
-cached = 0.1
+input = 0.14
+output = 1.00
+cached = 0.05
 
 [limit]
 context = 262_144

diff --git a/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml b/providers/deepinfra/models/Qwen/Qwen3.5-397B-A17B.toml
@@ -4,16 +4,15 @@ release_date = "2026-02-01"
 last_updated = "2026-04-20"
 attachment = true
 reasoning = true
-reasoning_options = [{ type = "toggle" }]
 temperature = true
 knowledge = "2025-01"
 tool_call = true
 open_weights = true
 
 [cost]
-input = 0.54
-output = 3.4
-cached = 0.27
+input = 0.45
+output = 3.00
+cached = 0.22
 
 [limit]
 context = 262_144

diff --git a/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml b/providers/deepinfra/models/Qwen/Qwen3.6-35B-A3B.toml
@@ -4,14 +4,13 @@ release_date = "2026-04-01"
 last_updated = "2026-04-01"
 attachment = true
 reasoning = true
-reasoning_options = [{ type = "toggle" }]
 temperature = true
 tool_call = true
 open_weights = true
 
 [cost]
-input = 0.20
-output = 1.00
+input = 0.15
+output = 0.95
 
 [limit]
 context = 262_144

diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-R1-0528.toml
@@ -3,10 +3,10 @@ release_date = "2025-05-28"
 last_updated = "2025-05-28"
 attachment = false
 reasoning = true
-reasoning_options = []
+reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["low", "medium", "high"] }]
 temperature = true
 knowledge = "2024-07"
-tool_call = false
+tool_call = true
 open_weights = false
 
 [interleaved]

diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Flash.toml
@@ -1,5 +1,5 @@
 base_model = "deepseek/deepseek-v4-flash"
-reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["low", "medium", "high", "xhigh"] }]
+reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["high"] }]
 
 [interleaved]
 field = "reasoning_content"

diff --git a/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml b/providers/deepinfra/models/deepseek-ai/DeepSeek-V4-Pro.toml
@@ -1,5 +1,5 @@
 base_model = "deepseek/deepseek-v4-pro"
-reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["low", "medium", "high", "xhigh"] }]
+reasoning_options = [{ type = "toggle" }, { type = "effort", values = ["high"] }]
 
 [interleaved]
 field = "reasoning_content"

diff --git a/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml b/providers/deepinfra/models/meta-llama/Llama-3.3-70B-Instruct-Turbo.toml
@@ -4,7 +4,6 @@ release_date = "2024-12-06"
 last_updated = "2024-12-06"
 attachment = false
 reasoning = false
-reasoning_options = []
 tool_call = true
 open_weights = true
 

diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml b/providers/deepinfra/models/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8.toml
@@ -4,16 +4,15 @@ release_date = "2025-04-05"
 last_updated = "2025-04-05"
 attachment = false
 reasoning = false
-reasoning_options = []
-tool_call = true
+tool_call = false
 open_weights = true
 
 [cost]
 input = 0.15
 output = 0.60
 
 [limit]
-context = 1_000_000
+context = 1_048_576
 output = 16_384
 
 [modalities]

diff --git a/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml b/providers/deepinfra/models/meta-llama/Llama-4-Scout-17B-16E-Instruct.toml
@@ -4,16 +4,15 @@ release_date = "2025-04-05"
 last_updated = "2025-04-05"
 attachment = false
 reasoning = false
-reasoning_options = []
 tool_call = true
 open_weights = true
 
 [cost]
-input = 0.08
+input = 0.10
 output = 0.30
 
 [limit]
-context = 10_000_000
+context = 327_680
 output = 16_384
 
 [modalities]

diff --git a/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml b/providers/deepinfra/models/moonshotai/Kimi-K2.5.toml
@@ -4,17 +4,16 @@ release_date = "2026-01-27"
 last_updated = "2026-01-27"
 attachment = true
 reasoning = true
-reasoning_options = [{ type = "toggle" }]
 structured_output = true
 temperature = true
 tool_call = true
 knowledge = "2025-01"
 open_weights = true
 
 [cost]
-input = 0.50
-output = 2.80
-cached_input = 0.09
+input = 0.45
+output = 2.25
+cached_input = 0.07
 
 [limit]
 context = 262_144

diff --git a/providers/deepinfra/models/openai/gpt-oss-120b.toml b/providers/deepinfra/models/openai/gpt-oss-120b.toml
@@ -12,8 +12,8 @@ tool_call = true
 open_weights = true
 
 [cost]
-input = 0.05
-output = 0.24
+input = 0.039
+output = 0.19
 
 [limit]
 context = 131_072

diff --git a/providers/deepinfra/models/zai-org/GLM-4.6.toml b/providers/deepinfra/models/zai-org/GLM-4.6.toml
@@ -20,7 +20,7 @@ output = 1.74
 cache_read = 0.08
 
 [limit]
-context = 204_800
+context = 202_752
 output = 131_072
 
 [modalities]

diff --git a/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml b/providers/deepinfra/models/zai-org/GLM-4.7-Flash.toml
@@ -6,7 +6,6 @@ last_updated = "2026-01-19"
 knowledge = "2025-04"
 attachment = false
 reasoning = true
-reasoning_options = [{ type = "toggle" }]
 temperature = true
 tool_call = true
 open_weights = true

diff --git a/providers/deepinfra/models/zai-org/GLM-4.7.toml b/providers/deepinfra/models/zai-org/GLM-4.7.toml
@@ -15,7 +15,7 @@ open_weights = true
 field = "reasoning_content"
 
 [cost]
-input = 0.43
+input = 0.40
 output = 1.75
 cache_read = 0.08
 

diff --git a/providers/deepinfra/models/zai-org/GLM-5.1.toml b/providers/deepinfra/models/zai-org/GLM-5.1.toml
@@ -16,9 +16,9 @@ structured_output = true
 field = "reasoning_content"
 
 [cost]
-input = 1.4
-output = 4.4
-cache_read = 0.26
+input = 1.05
+output = 3.50
+cache_read = 0.205
 
 [limit]
 context = 202_752 

diff --git a/providers/deepinfra/models/zai-org/GLM-5.toml b/providers/deepinfra/models/zai-org/GLM-5.toml
@@ -15,9 +15,9 @@ open_weights = true
 field = "reasoning_content"
 
 [cost]
-input = 0.8
-output = 2.56
-cache_read = 0.16
+input = 0.60
+output = 2.08
+cache_read = 0.12
 
 [limit]
 context = 202_752