From ed237fe1affe1312a97543504388cf1a14bf26e1 Mon Sep 17 00:00:00 2001
From: Olocool17 <22843298+Olocool17@users.noreply.github.com>
Date: Thu, 11 Dec 2025 13:38:29 +0000
Subject: [PATCH 1/2] fix: structured outputs when using responses model

---
 dspy/clients/lm.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py
index f4885d470e..9df61ae035 100644
--- a/dspy/clients/lm.py
+++ b/dspy/clients/lm.py
@@ -6,6 +6,7 @@
 from typing import Any, Literal, cast
 
 import litellm
+import pydantic
 from anyio.streams.memory import MemoryObjectSendStream
 from asyncer import syncify
 
@@ -501,6 +502,12 @@ def _convert_chat_request_to_responses_request(request: dict[str, Any]):
     # Convert `response_format` to `text.format` for Responses API
     if "response_format" in request:
         response_format = request.pop("response_format")
+        if isinstance(response_format, type) and issubclass(response_format, pydantic.BaseModel):
+            response_format = {
+                "name": response_format.__name__,
+                "type": "json_schema",
+                "schema": response_format.model_json_schema(),
+            }
         text = request.pop("text", {})
         request["text"] = {**text, "format": response_format}
 

From 5349ce25b3cae86398c0ec1c299634e028dd3abb Mon Sep 17 00:00:00 2001
From: Olocool17 <22843298+Olocool17@users.noreply.github.com>
Date: Thu, 11 Dec 2025 13:46:41 +0000
Subject: [PATCH 2/2] fix: cache retrieval error when using responses model

---
 dspy/clients/cache.py | 1 -
 dspy/clients/lm.py    | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/dspy/clients/cache.py b/dspy/clients/cache.py
index 4773306ccc..7ed1fafd94 100644
--- a/dspy/clients/cache.py
+++ b/dspy/clients/cache.py
@@ -122,7 +122,6 @@ def get(self, request: dict[str, Any], ignored_args_for_cache_key: list[str] | N
         if hasattr(response, "usage"):
             # Clear the usage data when cache is hit, because no LM call is made
             response.usage = {}
-            response.cache_hit = True
         return response
 
     def put(
diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py
index 9df61ae035..477272dfa6 100644
--- a/dspy/clients/lm.py
+++ b/dspy/clients/lm.py
@@ -164,7 +164,7 @@ def forward(
 
         self._check_truncation(results)
 
-        if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
+        if dspy.settings.usage_tracker and hasattr(results, "usage"):
             settings.usage_tracker.add_usage(self.model, dict(results.usage))
         return results
 
@@ -202,7 +202,7 @@ async def aforward(
 
         self._check_truncation(results)
 
-        if not getattr(results, "cache_hit", False) and dspy.settings.usage_tracker and hasattr(results, "usage"):
+        if dspy.settings.usage_tracker and hasattr(results, "usage"):
             settings.usage_tracker.add_usage(self.model, dict(results.usage))
         return results