From 87ab524b6270fdf9ea31c261c6fb5598f365a78e Mon Sep 17 00:00:00 2001
From: Dave Page <dpage@pgadmin.org>
Date: Fri, 27 Mar 2026 09:16:00 +0000
Subject: [PATCH 1/3] Support /v1/responses for OpenAI models. #9795

---
 web/pgadmin/llm/providers/openai.py | 440 +++++++++++++++++++++++++---
 1 file changed, 397 insertions(+), 43 deletions(-)

diff --git a/web/pgadmin/llm/providers/openai.py b/web/pgadmin/llm/providers/openai.py
index 2b0e2072917..2fb33e985e5 100644
--- a/web/pgadmin/llm/providers/openai.py
+++ b/web/pgadmin/llm/providers/openai.py
@@ -47,7 +47,9 @@ class OpenAIClient(LLMClient):
     OpenAI GPT API client.
 
     Implements the LLMClient interface for OpenAI's GPT models
-    and any OpenAI-compatible API endpoint.
+    and any OpenAI-compatible API endpoint. Supports both the
+    Chat Completions API (/v1/chat/completions) and the Responses
+    API (/v1/responses) for newer models that require it.
     """
 
     def __init__(self, api_key: Optional[str] = None,
@@ -66,8 +68,15 @@ def __init__(self, api_key: Optional[str] = None,
         """
         self._api_key = api_key or ''
         self._model = model or DEFAULT_MODEL
-        base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/')
-        self._api_url = f'{base_url}/chat/completions'
+        self._base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/')
+        self._use_responses_api = False
+
+    @property
+    def _api_url(self) -> str:
+        """Return the appropriate API endpoint URL."""
+        if self._use_responses_api:
+            return f'{self._base_url}/responses'
+        return f'{self._base_url}/chat/completions'
 
     @property
     def provider_name(self) -> str:
@@ -81,7 +90,9 @@ def is_available(self) -> bool:
         """Check if the client is properly configured."""
         # API key is required for the default OpenAI endpoint, but optional
         # for custom endpoints (e.g., local LLM servers).
-        if self._api_url.startswith(DEFAULT_API_BASE_URL):
+        if self._base_url.rstrip('/').startswith(
+            DEFAULT_API_BASE_URL.rstrip('/')
+        ):
             return bool(self._api_key)
         return True
 
@@ -109,10 +120,72 @@ def chat(
         Raises:
             LLMClientError: If the request fails.
         """
-        # Build the request payload
+        if self._use_responses_api:
+            return self._chat_responses(
+                messages, tools, system_prompt, max_tokens
+            )
+
+        # Try Chat Completions API first
+        payload = self._build_chat_payload(
+            messages, tools, system_prompt, max_tokens
+        )
+
+        try:
+            response_data = self._make_request(payload)
+            return self._parse_response(response_data)
+        except LLMClientError as e:
+            if self._should_use_responses_api(e):
+                self._use_responses_api = True
+                return self._chat_responses(
+                    messages, tools, system_prompt, max_tokens
+                )
+            raise
+        except Exception as e:
+            raise LLMClientError(LLMError(
+                message=f"Request failed: {str(e)}",
+                provider=self.provider_name
+            ))
+
+    def _chat_responses(
+        self,
+        messages: list[Message],
+        tools: Optional[list[Tool]] = None,
+        system_prompt: Optional[str] = None,
+        max_tokens: int = 4096,
+    ) -> LLMResponse:
+        """Send a chat request using the Responses API."""
+        payload = self._build_responses_payload(
+            messages, tools, system_prompt, max_tokens
+        )
+
+        try:
+            response_data = self._make_request(payload)
+            return self._parse_responses_response(response_data)
+        except LLMClientError:
+            raise
+        except Exception as e:
+            raise LLMClientError(LLMError(
+                message=f"Request failed: {str(e)}",
+                provider=self.provider_name
+            ))
+
+    def _should_use_responses_api(self, error: LLMClientError) -> bool:
+        """Check if the error indicates we should use the Responses API."""
+        error_msg = str(error).lower()
+        return ('v1/responses' in error_msg or
+                'not supported in the v1/chat/completions' in error_msg or
+                'not a chat model' in error_msg)
+
+    def _build_chat_payload(
+        self,
+        messages: list[Message],
+        tools: Optional[list[Tool]],
+        system_prompt: Optional[str],
+        max_tokens: int
+    ) -> dict:
+        """Build payload for the Chat Completions API."""
         converted_messages = self._convert_messages(messages)
 
-        # Add system prompt at the beginning if provided
         if system_prompt:
             converted_messages.insert(0, {
                 'role': 'system',
@@ -129,20 +202,35 @@ def chat(
             payload['tools'] = self._convert_tools(tools)
             payload['tool_choice'] = 'auto'
 
-        # Make the API request
-        try:
-            response_data = self._make_request(payload)
-            return self._parse_response(response_data)
-        except LLMClientError:
-            raise
-        except Exception as e:
-            raise LLMClientError(LLMError(
-                message=f"Request failed: {str(e)}",
-                provider=self.provider_name
-            ))
+        return payload
+
+    def _build_responses_payload(
+        self,
+        messages: list[Message],
+        tools: Optional[list[Tool]],
+        system_prompt: Optional[str],
+        max_tokens: int
+    ) -> dict:
+        """Build payload for the Responses API."""
+        input_items = self._convert_messages_responses(messages)
+
+        payload = {
+            'model': self._model,
+            'input': input_items,
+            'max_output_tokens': max_tokens,
+        }
+
+        if system_prompt:
+            payload['instructions'] = system_prompt
+
+        if tools:
+            payload['tools'] = self._convert_tools_responses(tools)
+            payload['tool_choice'] = 'auto'
+
+        return payload
 
     def _convert_messages(self, messages: list[Message]) -> list[dict]:
-        """Convert Message objects to OpenAI API format."""
+        """Convert Message objects to OpenAI Chat Completions API format."""
         result = []
 
         for msg in messages:
@@ -191,8 +279,53 @@ def _convert_messages(self, messages: list[Message]) -> list[dict]:
 
         return result
 
+    def _convert_messages_responses(
+        self, messages: list[Message]
+    ) -> list[dict]:
+        """Convert Message objects to OpenAI Responses API format."""
+        result = []
+
+        for msg in messages:
+            if msg.role == Role.SYSTEM:
+                result.append({
+                    'role': 'developer',
+                    'content': msg.content
+                })
+
+            elif msg.role == Role.USER:
+                result.append({
+                    'role': 'user',
+                    'content': msg.content
+                })
+
+            elif msg.role == Role.ASSISTANT:
+                if msg.content:
+                    result.append({
+                        'role': 'assistant',
+                        'content': msg.content
+                    })
+                # Tool calls are separate items in Responses API
+                if msg.tool_calls:
+                    for tc in msg.tool_calls:
+                        result.append({
+                            'type': 'function_call',
+                            'call_id': tc.id,
+                            'name': tc.name,
+                            'arguments': json.dumps(tc.arguments)
+                        })
+
+            elif msg.role == Role.TOOL:
+                for tr in msg.tool_results:
+                    result.append({
+                        'type': 'function_call_output',
+                        'call_id': tr.tool_call_id,
+                        'output': tr.content
+                    })
+
+        return result
+
     def _convert_tools(self, tools: list[Tool]) -> list[dict]:
-        """Convert Tool objects to OpenAI API format."""
+        """Convert Tool objects to Chat Completions API format."""
         return [
             {
                 'type': 'function',
@@ -205,6 +338,18 @@ def _convert_tools(self, tools: list[Tool]) -> list[dict]:
             for tool in tools
         ]
 
+    def _convert_tools_responses(self, tools: list[Tool]) -> list[dict]:
+        """Convert Tool objects to Responses API format."""
+        return [
+            {
+                'type': 'function',
+                'name': tool.name,
+                'description': tool.description,
+                'parameters': tool.parameters
+            }
+            for tool in tools
+        ]
+
     def _make_request(self, payload: dict) -> dict:
         """Make an HTTP request to the OpenAI API."""
         headers = {
@@ -256,7 +401,7 @@ def _make_request(self, payload: dict) -> dict:
             ))
 
     def _parse_response(self, data: dict) -> LLMResponse:
-        """Parse the OpenAI API response into an LLMResponse."""
+        """Parse the Chat Completions API response into an LLMResponse."""
         # Check for API-level errors in the response
         if 'error' in data:
             error_info = data['error']
@@ -357,6 +502,90 @@ def _parse_response(self, data: dict) -> LLMResponse:
             raw_response=data
         )
 
+    def _parse_responses_response(self, data: dict) -> LLMResponse:
+        """Parse the Responses API response into an LLMResponse."""
+        # Check for API-level errors
+        if 'error' in data:
+            error_info = data['error']
+            raise LLMClientError(LLMError(
+                message=error_info.get('message', 'Unknown API error'),
+                code=error_info.get('code', 'unknown'),
+                provider=self.provider_name,
+                retryable=False
+            ))
+
+        output = data.get('output', [])
+        content = ''
+        tool_calls = []
+
+        for item in output:
+            item_type = item.get('type', '')
+
+            if item_type == 'message':
+                for part in item.get('content', []):
+                    if part.get('type') == 'output_text':
+                        content += part.get('text', '')
+
+            elif item_type == 'function_call':
+                try:
+                    arguments = json.loads(
+                        item.get('arguments', '{}')
+                    )
+                except json.JSONDecodeError:
+                    arguments = {}
+
+                tool_calls.append(ToolCall(
+                    id=item.get('call_id', str(uuid.uuid4())),
+                    name=item.get('name', ''),
+                    arguments=arguments
+                ))
+
+        # Determine stop reason
+        status = data.get('status', '')
+        if tool_calls:
+            stop_reason = StopReason.TOOL_USE
+        elif status == 'completed':
+            stop_reason = StopReason.END_TURN
+        elif status == 'incomplete':
+            stop_reason = StopReason.MAX_TOKENS
+        else:
+            stop_reason = StopReason.UNKNOWN
+
+        # Parse usage information
+        usage_data = data.get('usage', {})
+        usage = Usage(
+            input_tokens=usage_data.get('input_tokens', 0),
+            output_tokens=usage_data.get('output_tokens', 0),
+            total_tokens=usage_data.get('total_tokens', 0)
+        )
+
+        # Check for problematic responses
+        if not content and not tool_calls:
+            if stop_reason == StopReason.MAX_TOKENS:
+                input_tokens = usage.input_tokens
+                raise LLMClientError(LLMError(
+                    message=f'Response truncated due to token limit '
+                            f'(input: {input_tokens} tokens). '
+                            f'The request is too large for model '
+                            f'{self._model}. '
+                            f'Try using a model with a larger context '
+                            f'window, or analyze a smaller scope (e.g., a '
+                            f'specific schema instead of the entire '
+                            f'database).',
+                    code='max_tokens',
+                    provider=self.provider_name,
+                    retryable=False
+                ))
+
+        return LLMResponse(
+            content=content,
+            tool_calls=tool_calls,
+            stop_reason=stop_reason,
+            model=data.get('model', self._model),
+            usage=usage,
+            raw_response=data
+        )
+
     def chat_stream(
         self,
         messages: list[Message],
@@ -367,30 +596,41 @@ def chat_stream(
         **kwargs
     ) -> Generator[Union[str, LLMResponse], None, None]:
         """Stream a chat response from OpenAI."""
-        converted_messages = self._convert_messages(messages)
-
-        if system_prompt:
-            converted_messages.insert(0, {
-                'role': 'system',
-                'content': system_prompt
-            })
-
-        payload = {
-            'model': self._model,
-            'messages': converted_messages,
-            'max_completion_tokens': max_tokens,
-            'stream': True,
-            'stream_options': {'include_usage': True}
-        }
+        if self._use_responses_api:
+            payload = self._build_responses_payload(
+                messages, tools, system_prompt, max_tokens
+            )
+            payload['stream'] = True
+            try:
+                yield from self._process_stream(payload)
+            except LLMClientError:
+                raise
+            except Exception as e:
+                raise LLMClientError(LLMError(
+                    message=f"Streaming request failed: {str(e)}",
+                    provider=self.provider_name
+                ))
+            return
 
-        if tools:
-            payload['tools'] = self._convert_tools(tools)
-            payload['tool_choice'] = 'auto'
+        # Try Chat Completions API first
+        payload = self._build_chat_payload(
+            messages, tools, system_prompt, max_tokens
+        )
+        payload['stream'] = True
+        payload['stream_options'] = {'include_usage': True}
 
         try:
             yield from self._process_stream(payload)
-        except LLMClientError:
-            raise
+        except LLMClientError as e:
+            if self._should_use_responses_api(e):
+                self._use_responses_api = True
+                payload = self._build_responses_payload(
+                    messages, tools, system_prompt, max_tokens
+                )
+                payload['stream'] = True
+                yield from self._process_stream(payload)
+            else:
+                raise
         except Exception as e:
             raise LLMClientError(LLMError(
                 message=f"Streaming request failed: {str(e)}",
@@ -449,16 +689,19 @@ def _process_stream(
             ))
 
         try:
-            yield from self._read_openai_stream(response)
+            if self._use_responses_api:
+                yield from self._read_responses_stream(response)
+            else:
+                yield from self._read_openai_stream(response)
         finally:
             response.close()
 
     def _read_openai_stream(
         self, response
     ) -> Generator[Union[str, LLMResponse], None, None]:
-        """Read and parse an OpenAI-format SSE stream.
+        """Read and parse an OpenAI Chat Completions SSE stream.
 
-        Uses readline() for incremental reading — it returns as soon
+        Uses readline() for incremental reading -- it returns as soon
         as a complete line arrives from the server, unlike read()
         which blocks until a buffer fills up.
         """
@@ -574,3 +817,114 @@ def _read_openai_stream(
             model=model_name,
             usage=usage
         )
+
+    def _read_responses_stream(
+        self, response
+    ) -> Generator[Union[str, LLMResponse], None, None]:
+        """Read and parse an OpenAI Responses API SSE stream.
+
+        The Responses API uses named events with types like
+        response.output_text.delta for text streaming and
+        response.completed for the final response.
+        """
+        content_parts = []
+        # tool_calls_data: {call_id: {name, arguments}}
+        tool_calls_data = {}
+        model_name = self._model
+        usage = Usage()
+
+        while True:
+            line_bytes = response.readline()
+            if not line_bytes:
+                break
+
+            line = line_bytes.decode('utf-8', errors='replace').strip()
+
+            if not line or line.startswith(':'):
+                continue
+
+            # Skip event type lines - we identify events by data type field
+            if line.startswith('event: '):
+                continue
+
+            if not line.startswith('data: '):
+                continue
+
+            try:
+                data = json.loads(line[6:])
+            except json.JSONDecodeError:
+                continue
+
+            event_type = data.get('type', '')
+
+            if event_type == 'response.output_text.delta':
+                delta = data.get('delta', '')
+                if delta:
+                    content_parts.append(delta)
+                    yield delta
+
+            elif event_type == 'response.output_item.added':
+                item = data.get('item', {})
+                if item.get('type') == 'function_call':
+                    call_id = item.get('call_id', '')
+                    tool_calls_data[call_id] = {
+                        'name': item.get('name', ''),
+                        'arguments': ''
+                    }
+
+            elif event_type == 'response.function_call_arguments.delta':
+                call_id = data.get('call_id', '')
+                if call_id not in tool_calls_data:
+                    tool_calls_data[call_id] = {
+                        'name': '', 'arguments': ''
+                    }
+                tool_calls_data[call_id]['arguments'] += data.get(
+                    'delta', ''
+                )
+
+            elif event_type == 'response.completed':
+                resp = data.get('response', {})
+                u = resp.get('usage', {})
+                usage = Usage(
+                    input_tokens=u.get('input_tokens', 0),
+                    output_tokens=u.get('output_tokens', 0),
+                    total_tokens=u.get('total_tokens', 0)
+                )
+                model_name = resp.get('model', model_name)
+
+        # Build final response
+        content = ''.join(content_parts)
+        tool_calls = []
+        for call_id, tc in tool_calls_data.items():
+            try:
+                arguments = json.loads(tc['arguments']) \
+                    if tc['arguments'] else {}
+            except json.JSONDecodeError:
+                arguments = {}
+            tool_calls.append(ToolCall(
+                id=call_id or str(uuid.uuid4()),
+                name=tc['name'],
+                arguments=arguments
+            ))
+
+        if tool_calls:
+            stop_reason = StopReason.TOOL_USE
+        elif content:
+            stop_reason = StopReason.END_TURN
+        else:
+            stop_reason = StopReason.UNKNOWN
+
+        if not content and not tool_calls:
+            raise LLMClientError(LLMError(
+                message='No response content returned from API',
+                provider=self.provider_name,
+                retryable=False
+            ))
+
+        yield LLMResponse(
+            content=content,
+            tool_calls=tool_calls,
+            stop_reason=stop_reason,
+            model=model_name,
+            usage=usage
+        )

From 4959c752bc8e53dddad8eb6cf3c9b3ac6bfef38a Mon Sep 17 00:00:00 2001
From: Dave Page <dpage@pgadmin.org>
Date: Fri, 27 Mar 2026 10:30:19 +0000
Subject: [PATCH 2/3] Address CodeRabbit review feedback on OpenAI provider.

- Preserve exception chains with 'raise ... from e' in all
  exception handlers for better debugging tracebacks.
- Use f-string !s conversion instead of str() calls.
- Extract duplicated max_tokens error handling into a shared
  _raise_max_tokens_error() helper method.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 web/pgadmin/llm/providers/openai.py | 62 ++++++++++++-----------------
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/web/pgadmin/llm/providers/openai.py b/web/pgadmin/llm/providers/openai.py
index 2fb33e985e5..42e4f091437 100644
--- a/web/pgadmin/llm/providers/openai.py
+++ b/web/pgadmin/llm/providers/openai.py
@@ -142,9 +142,9 @@ def chat(
             raise
         except Exception as e:
             raise LLMClientError(LLMError(
-                message=f"Request failed: {str(e)}",
+                message=f"Request failed: {e!s}",
                 provider=self.provider_name
-            ))
+            )) from e
 
     def _chat_responses(
         self,
@@ -165,9 +165,9 @@ def _chat_responses(
             raise
         except Exception as e:
             raise LLMClientError(LLMError(
-                message=f"Request failed: {str(e)}",
+                message=f"Request failed: {e!s}",
                 provider=self.provider_name
-            ))
+            )) from e
 
     def _should_use_responses_api(self, error: LLMClientError) -> bool:
         """Check if the error indicates we should use the Responses API."""
@@ -400,6 +400,22 @@ def _make_request(self, payload: dict) -> dict:
                 retryable=True
             ))
 
+    def _raise_max_tokens_error(self, input_tokens: int):
+        """Raise an error when a response is truncated due to token limit."""
+        raise LLMClientError(LLMError(
+            message=f'Response truncated due to token limit '
+                    f'(input: {input_tokens} tokens). '
+                    f'The request is too large for model '
+                    f'{self._model}. '
+                    f'Try using a model with a larger context '
+                    f'window, or analyze a smaller scope (e.g., a '
+                    f'specific schema instead of the entire '
+                    f'database).',
+            code='max_tokens',
+            provider=self.provider_name,
+            retryable=False
+        ))
+
     def _parse_response(self, data: dict) -> LLMResponse:
         """Parse the Chat Completions API response into an LLMResponse."""
         # Check for API-level errors in the response
@@ -470,20 +486,7 @@ def _parse_response(self, data: dict) -> LLMResponse:
         # Check for problematic responses
         if not content and not tool_calls:
             if stop_reason == StopReason.MAX_TOKENS:
-                input_tokens = usage.input_tokens
-                raise LLMClientError(LLMError(
-                    message=f'Response truncated due to token limit '
-                            f'(input: {input_tokens} tokens). '
-                            f'The request is too large for model '
-                            f'{self._model}. '
-                            f'Try using a model with a larger context '
-                            f'window, or analyze a smaller scope (e.g., a '
-                            f'specific schema instead of the entire '
-                            f'database).',
-                    code='max_tokens',
-                    provider=self.provider_name,
-                    retryable=False
-                ))
+                self._raise_max_tokens_error(usage.input_tokens)
             elif finish_reason and finish_reason not in ('stop', 'tool_calls'):
                 raise LLMClientError(LLMError(
                     message=(f'Empty response with finish reason: '
@@ -562,20 +565,7 @@ def _parse_responses_response(self, data: dict) -> LLMResponse:
         # Check for problematic responses
         if not content and not tool_calls:
             if stop_reason == StopReason.MAX_TOKENS:
-                input_tokens = usage.input_tokens
-                raise LLMClientError(LLMError(
-                    message=f'Response truncated due to token limit '
-                            f'(input: {input_tokens} tokens). '
-                            f'The request is too large for model '
-                            f'{self._model}. '
-                            f'Try using a model with a larger context '
-                            f'window, or analyze a smaller scope (e.g., a '
-                            f'specific schema instead of the entire '
-                            f'database).',
-                    code='max_tokens',
-                    provider=self.provider_name,
-                    retryable=False
-                ))
+                self._raise_max_tokens_error(usage.input_tokens)
 
         return LLMResponse(
             content=content,
@@ -607,9 +597,9 @@ def chat_stream(
                 raise
             except Exception as e:
                 raise LLMClientError(LLMError(
-                    message=f"Streaming request failed: {str(e)}",
+                    message=f"Streaming request failed: {e!s}",
                     provider=self.provider_name
-                ))
+                )) from e
             return
 
         # Try Chat Completions API first
@@ -633,9 +623,9 @@ def chat_stream(
                 raise
         except Exception as e:
             raise LLMClientError(LLMError(
-                message=f"Streaming request failed: {str(e)}",
+                message=f"Streaming request failed: {e!s}",
                 provider=self.provider_name
-            ))
+            )) from e
 
     def _process_stream(
         self, payload: dict

From eaa92612651a1d2070f27e50a733c879349c702c Mon Sep 17 00:00:00 2001
From: Dave Page <dpage@pgadmin.org>
Date: Fri, 27 Mar 2026 11:05:13 +0000
Subject: [PATCH 3/3] Validate api_url and use incomplete_details from
 Responses API.

- Strip known endpoint suffixes (/chat/completions, /responses) from
  api_url in __init__ to prevent doubled paths if a user provides a
  full endpoint URL instead of a base URL.
- Use incomplete_details.reason from the Responses API to properly
  distinguish between max_output_tokens and content_filter when the
  response status is 'incomplete', in both the non-streaming and
  streaming parsers.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 web/pgadmin/llm/providers/openai.py | 39 ++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/web/pgadmin/llm/providers/openai.py b/web/pgadmin/llm/providers/openai.py
index 42e4f091437..d455eced9fb 100644
--- a/web/pgadmin/llm/providers/openai.py
+++ b/web/pgadmin/llm/providers/openai.py
@@ -68,7 +68,13 @@ def __init__(self, api_key: Optional[str] = None,
         """
         self._api_key = api_key or ''
         self._model = model or DEFAULT_MODEL
-        self._base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/')
+        base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/')
+        # Strip known endpoint suffixes in case the user provided a full URL
+        for suffix in ('/chat/completions', '/responses'):
+            if base_url.endswith(suffix):
+                base_url = base_url[:-len(suffix)].rstrip('/')
+                break
+        self._base_url = base_url
         self._use_responses_api = False
 
     @property
@@ -543,14 +549,22 @@ def _parse_responses_response(self, data: dict) -> LLMResponse:
                     arguments=arguments
                 ))
 
-        # Determine stop reason
+        # Determine stop reason from status and incomplete_details
         status = data.get('status', '')
         if tool_calls:
             stop_reason = StopReason.TOOL_USE
         elif status == 'completed':
             stop_reason = StopReason.END_TURN
         elif status == 'incomplete':
-            stop_reason = StopReason.MAX_TOKENS
+            reason = data.get(
+                'incomplete_details', {}
+            ).get('reason', '')
+            if reason == 'content_filter':
+                stop_reason = StopReason.STOP_SEQUENCE
+            elif reason == 'max_output_tokens':
+                stop_reason = StopReason.MAX_TOKENS
+            else:
+                stop_reason = StopReason.MAX_TOKENS
         else:
             stop_reason = StopReason.UNKNOWN
 
@@ -566,6 +580,13 @@ def _parse_responses_response(self, data: dict) -> LLMResponse:
         if not content and not tool_calls:
             if stop_reason == StopReason.MAX_TOKENS:
                 self._raise_max_tokens_error(usage.input_tokens)
+            elif stop_reason == StopReason.STOP_SEQUENCE:
+                raise LLMClientError(LLMError(
+                    message='Response blocked by content filter.',
+                    code='content_filter',
+                    provider=self.provider_name,
+                    retryable=False
+                ))
 
         return LLMResponse(
             content=content,
@@ -822,6 +843,8 @@ def _read_responses_stream(
         tool_calls_data = {}
         model_name = self._model
         usage = Usage()
+        resp_status = ''
+        resp_incomplete = {}
 
         while True:
             line_bytes = response.readline()
@@ -881,6 +904,8 @@ def _read_responses_stream(
                     total_tokens=u.get('total_tokens', 0)
                 )
                 model_name = resp.get('model', model_name)
+                resp_status = resp.get('status', '')
+                resp_incomplete = resp.get('incomplete_details', {})
 
         # Build final response
         content = ''.join(content_parts)
@@ -897,8 +922,16 @@ def _read_responses_stream(
                 arguments=arguments
             ))
 
+        # Determine stop reason from final response status
         if tool_calls:
             stop_reason = StopReason.TOOL_USE
+        elif resp_status == 'incomplete':
+            reason = resp_incomplete.get('reason', '') \
+                if resp_incomplete else ''
+            if reason == 'content_filter':
+                stop_reason = StopReason.STOP_SEQUENCE
+            else:
+                stop_reason = StopReason.MAX_TOKENS
         elif content:
             stop_reason = StopReason.END_TURN
         else: