Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions docs/models/openrouter.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,58 @@ model = OpenRouterModel('openai/gpt-5')
agent = Agent(model, model_settings=settings)
...
```

## Image Generation
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should also make this work with builtin_tools=[ImageGenerationTool()] and document it here: https://ai.pydantic.dev/builtin-tools/#image-generation-tool

As with Google, which doesn't expose that as a tool, using that tool or BinaryImage in output_type should automatically enable the modality.


You can use OpenRouter models that support image generation with the `openrouter_modalities` setting:

```python {test="skip"}
from pydantic_ai import Agent, BinaryImage
from pydantic_ai.models.openrouter import OpenRouterModelSettings

agent = Agent(
model='openrouter:google/gemini-2.5-flash-image-preview',
output_type=str | BinaryImage,
model_settings=OpenRouterModelSettings(openrouter_modalities=['image', 'text']),
)

result = agent.run_sync('A cat')
assert isinstance(result.output, BinaryImage)
```

You can further customize image generation using `openrouter_image_config`:

```python
from pydantic_ai.models.openrouter import OpenRouterModelSettings

settings = OpenRouterModelSettings(
openrouter_modalities=['image', 'text'],
openrouter_image_config={'aspect_ratio': '3:2'}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want this to be an option on ImageGenerationTool as in #3412.

Copy link
Collaborator

@DouweM DouweM Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK with it also being a model setting if it supports more keys than ImageGenerationTool does currently (or really: after that PR), but if this is the only field supported so far I'd prefer to leave that and the modality settings off.

If you want, you can finish that PR as we're at it to make your life here easier.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep! #3672

)
```

> Available aspect ratios: `'1:1'`, `'2:3'`, `'3:2'`, `'3:4'`, `'4:3'`, `'4:5'`, `'5:4'`, `'9:16'`, `'16:9'`, `'21:9'`.

Image generation also works with streaming:

```python {test="skip"}
from pydantic_ai import Agent, BinaryImage
from pydantic_ai.models.openrouter import OpenRouterModelSettings

agent = Agent(
model='openrouter:google/gemini-2.5-flash-image-preview',
output_type=str | BinaryImage,
model_settings=OpenRouterModelSettings(
openrouter_modalities=['image', 'text'],
openrouter_image_config={'aspect_ratio': '3:2'},
),
)

response = agent.run_stream_sync('A dog')
for output in response.stream_output():
if isinstance(output, str):
print(output)
elif isinstance(output, BinaryImage):
# Handle the generated image
print(f'Generated image: {output.media_type}')
```
76 changes: 39 additions & 37 deletions pydantic_ai_slim/pydantic_ai/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,28 +601,7 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
raise UnexpectedModelBehavior(f'Invalid response from {self.system} chat completions endpoint: {e}') from e

choice = response.choices[0]
items: list[ModelResponsePart] = []

if thinking_parts := self._process_thinking(choice.message):
items.extend(thinking_parts)

if choice.message.content:
items.extend(
(replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part)
for part in split_content_into_text_and_thinking(choice.message.content, self.profile.thinking_tags)
)
if choice.message.tool_calls is not None:
for c in choice.message.tool_calls:
if isinstance(c, ChatCompletionMessageFunctionToolCall):
part = ToolCallPart(c.function.name, c.function.arguments, tool_call_id=c.id)
elif isinstance(c, ChatCompletionMessageCustomToolCall): # pragma: no cover
# NOTE: Custom tool calls are not supported.
# See <https://github.com/pydantic/pydantic-ai/issues/2513> for more details.
raise RuntimeError('Custom tool calls are not supported')
else:
assert_never(c)
part.tool_call_id = _guard_tool_call_id(part)
items.append(part)
items = list(self._process_parts(choice.message))

return ModelResponse(
parts=items,
Expand All @@ -635,33 +614,56 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
finish_reason=self._map_finish_reason(choice.finish_reason),
)

def _process_thinking(self, message: chat.ChatCompletionMessage) -> list[ThinkingPart] | None:
def _process_thinking(self, message: chat.ChatCompletionMessage) -> Iterable[ThinkingPart]:
"""Hook that maps reasoning tokens to thinking parts.

This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
"""
profile = OpenAIModelProfile.from_profile(self.profile)
custom_field = profile.openai_chat_thinking_field
items: list[ThinkingPart] = []

# Prefer the configured custom reasoning field, if present in profile.
# Fall back to built-in fields if no custom field result was found.

# The `reasoning_content` field is typically present in DeepSeek and Moonshot models.
# https://api-docs.deepseek.com/guides/reasoning_model

# The `reasoning` field is typically present in gpt-oss via Ollama and OpenRouter.
# - https://cookbook.openai.com/articles/gpt-oss/handle-raw-cot#chat-completions-api
# - https://openrouter.ai/docs/use-cases/reasoning-tokens#basic-usage-with-reasoning-tokens

for field_name in (custom_field, 'reasoning', 'reasoning_content'):
if not field_name:
continue
reasoning: str | None = getattr(message, field_name, None)
if reasoning: # pragma: no branch
items.append(ThinkingPart(id=field_name, content=reasoning, provider_name=self.system))
return items
yield ThinkingPart(id=field_name, content=reasoning, provider_name=self.system)

return items or None
def _process_content(self, message: chat.ChatCompletionMessage) -> Iterable[TextPart | ThinkingPart]:
"""Hook that maps the message content to thinking or text parts.

This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
"""
if message.content:
for part in split_content_into_text_and_thinking(message.content, self.profile.thinking_tags):
yield replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part

def _process_tool_calls(self, message: chat.ChatCompletionMessage) -> Iterable[ToolCallPart]:
"""Hook that maps tool calls to tool call parts.

This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
"""
if message.tool_calls is not None:
for c in message.tool_calls:
if isinstance(c, ChatCompletionMessageFunctionToolCall):
part = ToolCallPart(c.function.name, c.function.arguments, tool_call_id=c.id)
elif isinstance(c, ChatCompletionMessageCustomToolCall): # pragma: no cover
# NOTE: Custom tool calls are not supported.
# See <https://github.com/pydantic/pydantic-ai/issues/2513> for more details.
raise RuntimeError('Custom tool calls are not supported')
else:
assert_never(c)
part.tool_call_id = _guard_tool_call_id(part)
yield part

def _process_parts(self, message: chat.ChatCompletionMessage) -> Iterable[ModelResponsePart]:
"""Hook that defines the mappings to transform message contents to response parts.

This method may be overridden by subclasses of `OpenAIChatModel` to apply custom mappings.
"""
return itertools.chain(
self._process_thinking(message), self._process_content(message), self._process_tool_calls(message)
)

async def _process_streamed_response(
self, response: AsyncStream[ChatCompletionChunk], model_request_parameters: ModelRequestParameters
Expand Down
109 changes: 104 additions & 5 deletions pydantic_ai_slim/pydantic_ai/models/openrouter.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
from __future__ import annotations as _annotations

import itertools
from collections.abc import Iterable
from dataclasses import dataclass, field
from typing import Annotated, Any, Literal, TypeAlias, cast

from pydantic import BaseModel, Discriminator
from typing_extensions import TypedDict, assert_never, override

from ..builtin_tools import ImageGenerationTool
from ..exceptions import ModelHTTPError
from ..messages import (
BinaryImage,
BuiltinToolCallPart,
BuiltinToolReturnPart,
FilePart,
FinishReason,
ModelResponsePart,
ModelResponseStreamEvent,
ThinkingPart,
)
Expand Down Expand Up @@ -197,6 +204,12 @@ class OpenRouterUsageConfig(TypedDict, total=False):
include: bool


class OpenRouterImageConfig(TypedDict, total=False):
"""Configuration for OpenRouter image generation."""

aspect_ratio: Literal['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9']


class OpenRouterModelSettings(ModelSettings, total=False):
"""Settings used for an OpenRouter model request."""

Expand Down Expand Up @@ -230,12 +243,24 @@ class OpenRouterModelSettings(ModelSettings, total=False):
The reasoning config object consolidates settings for controlling reasoning strength across different models. [See more](https://openrouter.ai/docs/use-cases/reasoning-tokens)
"""

openrouter_modalities: list[Literal['text', 'image']]
"""To control the modalities of the model.

The modalities config object consolidates settings for controlling the output modalities of the model. [See more](https://openrouter.ai/docs/guides/overview/multimodal/image-generation)
"""

openrouter_usage: OpenRouterUsageConfig
"""To control the usage of the model.

The usage config object consolidates settings for enabling detailed usage information. [See more](https://openrouter.ai/docs/use-cases/usage-accounting)
"""

openrouter_image_config: OpenRouterImageConfig
"""To control the image generation of the model.

The image config object consolidates settings for controlling the image generation of the model. [See more](https://openrouter.ai/docs/guides/overview/multimodal/image-generation#image-aspect-ratio-configuration)
"""


class _OpenRouterError(BaseModel):
"""Utility class to validate error messages from OpenRouter."""
Expand Down Expand Up @@ -366,6 +391,14 @@ class _OpenRouterChatCompletionMessageFunctionToolCall(chat.ChatCompletionMessag
]


class _OpenRouterImageUrl(BaseModel):
url: str


class _OpenRouterImageGeneration(BaseModel):
image_url: _OpenRouterImageUrl


class _OpenRouterCompletionMessage(chat.ChatCompletionMessage):
"""Wrapped chat completion message with OpenRouter specific attributes."""

Expand All @@ -378,6 +411,9 @@ class _OpenRouterCompletionMessage(chat.ChatCompletionMessage):
tool_calls: list[_OpenRouterChatCompletionMessageToolCallUnion] | None = None # type: ignore[reportIncompatibleVariableOverride]
"""The tool calls generated by the model, such as function calls."""

images: list[_OpenRouterImageGeneration] | None = None
"""The images generated by the model, if any."""


class _OpenRouterChoice(chat_completion.Choice):
"""Wraps OpenAI chat completion choice with OpenRouter specific attributes."""
Expand Down Expand Up @@ -468,11 +504,14 @@ def _map_openrouter_provider_details(
return provider_details


def _openrouter_settings_to_openai_settings(model_settings: OpenRouterModelSettings) -> OpenAIChatModelSettings:
def _openrouter_settings_to_openai_settings(
model_settings: OpenRouterModelSettings, model_request_parameters: ModelRequestParameters
) -> OpenAIChatModelSettings:
"""Transforms a 'OpenRouterModelSettings' object into an 'OpenAIChatModelSettings' object.

Args:
model_settings: The 'OpenRouterModelSettings' object to transform.
model_request_parameters: The 'ModelRequestParameters' object to use for the transformation.

Returns:
An 'OpenAIChatModelSettings' object with equivalent settings.
Expand All @@ -491,6 +530,17 @@ def _openrouter_settings_to_openai_settings(model_settings: OpenRouterModelSetti
extra_body['reasoning'] = reasoning
if usage := model_settings.pop('openrouter_usage', None):
extra_body['usage'] = usage
if modalities := model_settings.pop('openrouter_modalities', None):
extra_body['modalities'] = modalities
if image_config := model_settings.pop('openrouter_image_config', None):
extra_body['image_config'] = image_config

for builtin_tool in model_request_parameters.builtin_tools:
if isinstance(builtin_tool, ImageGenerationTool):
extra_body['modalities'] = ['text', 'image']

if isinstance(model_request_parameters.output_object, BinaryImage):
extra_body['modalities'] = ['text', 'image']

model_settings['extra_body'] = extra_body

Expand Down Expand Up @@ -525,9 +575,16 @@ def prepare_request(
model_request_parameters: ModelRequestParameters,
) -> tuple[ModelSettings | None, ModelRequestParameters]:
merged_settings, customized_parameters = super().prepare_request(model_settings, model_request_parameters)
new_settings = _openrouter_settings_to_openai_settings(cast(OpenRouterModelSettings, merged_settings or {}))
new_settings = _openrouter_settings_to_openai_settings(
cast(OpenRouterModelSettings, merged_settings or {}), model_request_parameters
)
return new_settings, customized_parameters

@override
def _get_web_search_options(self, model_request_parameters: ModelRequestParameters):
"""This method is nullified because OpenRouter handles web search through a different parameter."""
return None

@override
def _validate_completion(self, response: chat.ChatCompletion) -> _OpenRouterChatCompletion:
response = _OpenRouterChatCompletion.model_validate(response.model_dump())
Expand All @@ -538,13 +595,27 @@ def _validate_completion(self, response: chat.ChatCompletion) -> _OpenRouterChat
return response

@override
def _process_thinking(self, message: chat.ChatCompletionMessage) -> list[ThinkingPart] | None:
def _process_thinking(self, message: chat.ChatCompletionMessage) -> Iterable[ThinkingPart]:
assert isinstance(message, _OpenRouterCompletionMessage)

if reasoning_details := message.reasoning_details:
return [_from_reasoning_detail(detail) for detail in reasoning_details]
for detail in reasoning_details:
yield _from_reasoning_detail(detail)
else:
return super()._process_thinking(message)
yield from super()._process_thinking(message)

def _process_image(self, message: chat.ChatCompletionMessage) -> Iterable[FilePart]:
assert isinstance(message, _OpenRouterCompletionMessage)

if images := message.images:
for image in images:
yield FilePart(
content=BinaryImage.from_data_uri(image.image_url.url),
)

@override
def _process_parts(self, message: chat.ChatCompletionMessage) -> Iterable[ModelResponsePart]:
return itertools.chain(super()._process_parts(message), self._process_image(message))

@override
def _process_provider_details(self, response: chat.ChatCompletion) -> dict[str, Any]:
Expand Down Expand Up @@ -573,6 +644,12 @@ def _map_response_thinking_part(self, item: ThinkingPart) -> None:
else: # pragma: lax no cover
super()._map_response_thinking_part(item)

def _map_response_builtin_part(self, item: BuiltinToolCallPart | BuiltinToolReturnPart) -> None:
if isinstance(item, ImageGenerationTool):
pass

pass

@property
@override
def _streamed_response_cls(self):
Expand All @@ -594,6 +671,9 @@ class _OpenRouterChoiceDelta(chat_completion_chunk.ChoiceDelta):
reasoning_details: list[_OpenRouterReasoningDetail] | None = None
"""The reasoning details associated with the message, if any."""

images: list[_OpenRouterImageGeneration] | None = None
"""The images generated by the model, if any."""


class _OpenRouterChunkChoice(chat_completion_chunk.Choice):
"""Wraps OpenAI chat completion chunk choice with OpenRouter specific attributes."""
Expand Down Expand Up @@ -661,6 +741,25 @@ def _map_thinking_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[
else:
return super()._map_thinking_delta(choice)

def _map_file_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[ModelResponseStreamEvent]:
assert isinstance(choice, _OpenRouterChunkChoice)

if images := choice.delta.images:
for image in images:
yield self._parts_manager.handle_part(
vendor_part_id=None,
part=FilePart(
content=BinaryImage.from_data_uri(image.image_url.url),
),
)

@override
def _map_part_delta(self, choice: chat_completion_chunk.Choice) -> Iterable[ModelResponseStreamEvent]:
return itertools.chain(
super()._map_part_delta(choice),
self._map_file_delta(choice),
)

@override
def _map_provider_details(self, chunk: chat.ChatCompletionChunk) -> dict[str, Any] | None:
assert isinstance(chunk, _OpenRouterChatCompletionChunk)
Expand Down
Loading
Loading