Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions livekit-agents/livekit/agents/llm/fallback_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def chat(
conn_options: APIConnectOptions = DEFAULT_FALLBACK_API_CONNECT_OPTIONS,
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
response_format: NotGivenOr[Any] = NOT_GIVEN,
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
) -> LLMStream:
return FallbackLLMStream(
Expand All @@ -101,6 +102,7 @@ def chat(
tools=tools or [],
parallel_tool_calls=parallel_tool_calls,
tool_choice=tool_choice,
response_format=response_format,
extra_kwargs=extra_kwargs,
)

Expand All @@ -124,12 +126,14 @@ def __init__(
conn_options: APIConnectOptions,
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
response_format: NotGivenOr[Any] = NOT_GIVEN,
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
) -> None:
super().__init__(llm, chat_ctx=chat_ctx, tools=tools, conn_options=conn_options)
self._fallback_adapter = llm
self._parallel_tool_calls = parallel_tool_calls
self._tool_choice = tool_choice
self._response_format = response_format
self._extra_kwargs = extra_kwargs

self._current_stream: LLMStream | None = None
Expand Down Expand Up @@ -164,6 +168,7 @@ async def _try_generate(
tools=self._tools,
parallel_tool_calls=self._parallel_tool_calls,
tool_choice=self._tool_choice,
response_format=self._response_format,
extra_kwargs=self._extra_kwargs,
conn_options=dataclasses.replace(
self._conn_options,
Expand Down
1 change: 1 addition & 0 deletions livekit-agents/livekit/agents/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def chat(
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
response_format: NotGivenOr[Any] = NOT_GIVEN,
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
) -> LLMStream: ...

Expand Down
9 changes: 8 additions & 1 deletion livekit-agents/livekit/agents/voice/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
class ModelSettings:
tool_choice: NotGivenOr[llm.ToolChoice] = NOT_GIVEN
"""The tool choice to use when calling the LLM."""
response_format: NotGivenOr[Any] = NOT_GIVEN
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since not all LLMs support response format, perhaps a better way is to use it in a custom llm_node instead of adding it to ModelSettings, here is an example.

btw, I didn't see how you want to pass the response_format to the agent from the user's code.

"""The response format to use when calling the LLM."""


class Agent:
Expand Down Expand Up @@ -409,11 +411,16 @@ async def llm_node(
)

tool_choice = model_settings.tool_choice if model_settings else NOT_GIVEN
response_format = model_settings.response_format if model_settings else NOT_GIVEN
activity_llm = activity.llm

conn_options = activity.session.conn_options.llm_conn_options
async with activity_llm.chat(
chat_ctx=chat_ctx, tools=tools, tool_choice=tool_choice, conn_options=conn_options
chat_ctx=chat_ctx,
tools=tools,
tool_choice=tool_choice,
response_format=response_format,
conn_options=conn_options,
) as stream:
async for chunk in stream:
yield chunk
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def chat(
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
response_format: NotGivenOr[Any] = NOT_GIVEN,
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
) -> LLMStream:
extra = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ def chat(
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
temperature: NotGivenOr[float] = NOT_GIVEN,
response_format: NotGivenOr[Any] = NOT_GIVEN,
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
) -> LLMStream:
opts: dict[str, Any] = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def chat(
# these are unused, since tool execution takes place in langgraph
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
response_format: NotGivenOr[Any] = NOT_GIVEN,
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
) -> LangGraphStream[ContextT]:
return LangGraphStream(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def chat(
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
response_format: NotGivenOr[Any] = NOT_GIVEN,
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
Comment on lines +122 to 123
Copy link
Contributor

@coderabbitai coderabbitai bot Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🌐 Web query:

OpenAI Responses API response_format parameter structure 2025

💡 Result:

Short answer — structure and examples (2025)

  • Two main modes: JSON mode and Structured Outputs (schema-enforced). Use JSON mode with type "json_object"; use Structured Outputs with type "json_schema" and include a JSON Schema under "json_schema". [1][2]

  • Current Responses API placement (2025): put the format inside the text field:
    Example (schema-enforced):
    {
    "text": {
    "format": {
    "type": "json_schema",
    "json_schema": {
    "type": "object",
    "properties": {
    "name": {"type":"string"},
    "age": {"type":"integer"}
    },
    "required":["name","age"],
    "additionalProperties": false
    }
    }
    }
    }
    Example (JSON mode):
    {
    "text": {
    "format": { "type": "json_object" }
    }
    }
    (The older parameter name response_format is still seen in examples but the Responses API places format under text.format). [1][2][3]

Sources

  • OpenAI Structured Outputs / JSON mode docs. [1]
  • OpenAI announcement + SDK examples for Structured Outputs. [2]
  • 2025 technical deep-dive showing text.format usage and examples. [3]

🏁 Script executed:

# First, let's check the actual file to understand the context
fd -type f -name "llm.py" | grep -i openai | head -5

Repository: livekit/agents

Length of output: 228


🏁 Script executed:

# Find the llm.py file in openai responses directory
find . -path "*/openai/responses/llm.py" -type f

Repository: livekit/agents

Length of output: 138


🏁 Script executed:

# Also check the directory structure
fd "llm.py" | grep openai

Repository: livekit/agents

Length of output: 205


🏁 Script executed:

# Read the llm.py file to examine the response_format parameter and API call
cat -n ./livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/llm.py | sed -n '110,170p'

Repository: livekit/agents

Length of output: 2576


🏁 Script executed:

# Search for all usages of response_format in the openai plugin
rg "response_format" livekit-plugins/livekit-plugins-openai/

Repository: livekit/agents

Length of output: 1615


🏁 Script executed:

# Check LLMStream class definition to see if it handles response_format
cat -n ./livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/llm.py | sed -n '170,250p'

Repository: livekit/agents

Length of output: 3694


🏁 Script executed:

# Check the regular llm.py file to see response_format handling
cat -n ./livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/llm.py | sed -n '1,50p'

Repository: livekit/agents

Length of output: 1868


🏁 Script executed:

# Search for the chat() method in llm.py to see how response_format is used
rg -A 30 "def chat" ./livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/llm.py | head -80

Repository: livekit/agents

Length of output: 1173


🏁 Script executed:

# Check llm_utils to understand to_openai_response_format
fd "llm_utils" livekit-plugins/livekit-plugins-openai/

Repository: livekit/agents

Length of output: 40


🏁 Script executed:

# Find where llm_utils is located
fd -name "*.py" | xargs grep -l "def to_openai_response_format" | head -3

Repository: livekit/agents

Length of output: 288


🏁 Script executed:

# Check how response_format is used in the regular llm.py
rg -B 5 -A 10 "extra\[\"response_format\"\]" ./livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/llm.py

Repository: livekit/agents

Length of output: 684


🏁 Script executed:

# Check if there are any tests or examples showing how response_format is used
rg -B 5 -A 5 "response_format" ./livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/ | head -40

Repository: livekit/agents

Length of output: 1398


🏁 Script executed:

# Find and examine the llm_utils to_openai_response_format function
find . -path "*/agents/llm/utils.py" -o -path "*/livekit/agents/llm/utils.py" | xargs cat 2>/dev/null | head -200

Repository: livekit/agents

Length of output: 6557


🏁 Script executed:

# Or search more broadly
rg -B 2 -A 15 "def to_openai_response_format" --type py

Repository: livekit/agents

Length of output: 1403


🏁 Script executed:

# Let's verify the exact structure of response_format for Responses API
# by checking if there are any tests or examples
rg -r "responses.create" ./livekit-plugins/livekit-plugins-openai/ --type py -A 10 | head -50

Repository: livekit/agents

Length of output: 40


🏁 Script executed:

# Check the full to_openai_response_format function to understand the structure better
rg -B 5 -A 25 "def to_openai_response_format" livekit-agents/

Repository: livekit/agents

Length of output: 2378


response_format parameter is accepted but not utilized in the API call.

OpenAI's Responses API supports response_format for structured output control. The parameter is defined in the method signature but never propagated to the API request, unlike parallel_tool_calls and tool_choice.

🔧 Suggested fix to propagate response_format
         if is_given(tool_choice):
             oai_tool_choice: response_create_params.ToolChoice
             if isinstance(tool_choice, dict):
                 oai_tool_choice = {
                     "type": "function",
                     "name": tool_choice["function"]["name"],
                 }
                 extra["tool_choice"] = oai_tool_choice
             elif tool_choice in ("auto", "required", "none"):
                 oai_tool_choice = tool_choice  # type: ignore
                 extra["tool_choice"] = oai_tool_choice
 
+        if is_given(response_format):
+            extra["text"] = {"format": response_format}
+
         return LLMStream(
🤖 Prompt for AI Agents
In
`@livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/llm.py`
around lines 122 - 123, The response_format parameter is accepted by the method
but never forwarded to the OpenAI Responses API; update the Responses API
invocation (the same call that already forwards parallel_tool_calls and
tool_choice) to include response_format when it is not NOT_GIVEN: check for
NOT_GIVEN and, if a real value, add response_format=response_format to the API
call arguments so the Responses API receives the structured output directive.
Ensure you reference the existing NOT_GIVEN sentinel and the response_format
parameter in your change.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the goal of this PR is to make it work only for google, leaving this as a passthrough.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vishal-seshagiri-infinitusai, understood! Since the goal is to implement response_format for Google only, leaving it as a passthrough for OpenAI Responses makes sense for this PR. The parameter maintains interface consistency across providers while keeping the implementation scoped appropriately.


✏️ Learnings added
Learnt from: vishal-seshagiri-infinitusai
Repo: livekit/agents PR: 4559
File: livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/llm.py:122-123
Timestamp: 2026-01-19T23:21:47.799Z
Learning: In PR `#4559`, response_format was intentionally added as a passthrough (accepted but not utilized) to the OpenAI Responses API in `livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/llm.py`. The implementation is scoped only for Google provider, not for OpenAI.

Note: Learnings are effective only in the context of similar code segments. To apply general coding guidelines, please configure review instructions. You can manage existing learnings in the app.


🧠 Learnings used
Learnt from: davidzhao
Repo: livekit/agents PR: 4548
File: livekit-plugins/livekit-plugins-openai/livekit/plugins/openai/responses/llm.py:77-81
Timestamp: 2026-01-18T01:08:55.648Z
Learning: In the OpenAI responses LLM (`livekit-plugins-openai/livekit/plugins/openai/responses/llm.py`), reasoning effort defaults are intentionally set lower than OpenAI's API defaults for voice interactions: "none" for gpt-5.1/gpt-5.2 and "minimal" for other reasoning-capable models like gpt-5, to avoid enabling reasoning by default in voice contexts.

) -> LLMStream:
extra = {}
Expand Down
1 change: 1 addition & 0 deletions tests/fake_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def chat(
conn_options: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
response_format: NotGivenOr[Any] = NOT_GIVEN,
extra_kwargs: NotGivenOr[dict[str, Any]] = NOT_GIVEN,
) -> LLMStream:
return FakeLLMStream(self, chat_ctx=chat_ctx, tools=tools or [], conn_options=conn_options)
Expand Down