From 87ab524b6270fdf9ea31c261c6fb5598f365a78e Mon Sep 17 00:00:00 2001 From: Dave Page Date: Fri, 27 Mar 2026 09:16:00 +0000 Subject: [PATCH 1/3] Support /v1/responses for OpenAI models. #9795 --- web/pgadmin/llm/providers/openai.py | 440 +++++++++++++++++++++++++--- 1 file changed, 397 insertions(+), 43 deletions(-) diff --git a/web/pgadmin/llm/providers/openai.py b/web/pgadmin/llm/providers/openai.py index 2b0e2072917..2fb33e985e5 100644 --- a/web/pgadmin/llm/providers/openai.py +++ b/web/pgadmin/llm/providers/openai.py @@ -47,7 +47,9 @@ class OpenAIClient(LLMClient): OpenAI GPT API client. Implements the LLMClient interface for OpenAI's GPT models - and any OpenAI-compatible API endpoint. + and any OpenAI-compatible API endpoint. Supports both the + Chat Completions API (/v1/chat/completions) and the Responses + API (/v1/responses) for newer models that require it. """ def __init__(self, api_key: Optional[str] = None, @@ -66,8 +68,15 @@ def __init__(self, api_key: Optional[str] = None, """ self._api_key = api_key or '' self._model = model or DEFAULT_MODEL - base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/') - self._api_url = f'{base_url}/chat/completions' + self._base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/') + self._use_responses_api = False + + @property + def _api_url(self) -> str: + """Return the appropriate API endpoint URL.""" + if self._use_responses_api: + return f'{self._base_url}/responses' + return f'{self._base_url}/chat/completions' @property def provider_name(self) -> str: @@ -81,7 +90,9 @@ def is_available(self) -> bool: """Check if the client is properly configured.""" # API key is required for the default OpenAI endpoint, but optional # for custom endpoints (e.g., local LLM servers). - if self._api_url.startswith(DEFAULT_API_BASE_URL): + if self._base_url.rstrip('/').startswith( + DEFAULT_API_BASE_URL.rstrip('/') + ): return bool(self._api_key) return True @@ -109,10 +120,72 @@ def chat( Raises: LLMClientError: If the request fails. """ - # Build the request payload + if self._use_responses_api: + return self._chat_responses( + messages, tools, system_prompt, max_tokens + ) + + # Try Chat Completions API first + payload = self._build_chat_payload( + messages, tools, system_prompt, max_tokens + ) + + try: + response_data = self._make_request(payload) + return self._parse_response(response_data) + except LLMClientError as e: + if self._should_use_responses_api(e): + self._use_responses_api = True + return self._chat_responses( + messages, tools, system_prompt, max_tokens + ) + raise + except Exception as e: + raise LLMClientError(LLMError( + message=f"Request failed: {str(e)}", + provider=self.provider_name + )) + + def _chat_responses( + self, + messages: list[Message], + tools: Optional[list[Tool]] = None, + system_prompt: Optional[str] = None, + max_tokens: int = 4096, + ) -> LLMResponse: + """Send a chat request using the Responses API.""" + payload = self._build_responses_payload( + messages, tools, system_prompt, max_tokens + ) + + try: + response_data = self._make_request(payload) + return self._parse_responses_response(response_data) + except LLMClientError: + raise + except Exception as e: + raise LLMClientError(LLMError( + message=f"Request failed: {str(e)}", + provider=self.provider_name + )) + + def _should_use_responses_api(self, error: LLMClientError) -> bool: + """Check if the error indicates we should use the Responses API.""" + error_msg = str(error).lower() + return ('v1/responses' in error_msg or + 'not supported in the v1/chat/completions' in error_msg or + 'not a chat model' in error_msg) + + def _build_chat_payload( + self, + messages: list[Message], + tools: Optional[list[Tool]], + system_prompt: Optional[str], + max_tokens: int + ) -> dict: + """Build payload for the Chat Completions API.""" converted_messages = self._convert_messages(messages) - # Add system prompt at the beginning if provided if system_prompt: converted_messages.insert(0, { 'role': 'system', @@ -129,20 +202,35 @@ def chat( payload['tools'] = self._convert_tools(tools) payload['tool_choice'] = 'auto' - # Make the API request - try: - response_data = self._make_request(payload) - return self._parse_response(response_data) - except LLMClientError: - raise - except Exception as e: - raise LLMClientError(LLMError( - message=f"Request failed: {str(e)}", - provider=self.provider_name - )) + return payload + + def _build_responses_payload( + self, + messages: list[Message], + tools: Optional[list[Tool]], + system_prompt: Optional[str], + max_tokens: int + ) -> dict: + """Build payload for the Responses API.""" + input_items = self._convert_messages_responses(messages) + + payload = { + 'model': self._model, + 'input': input_items, + 'max_output_tokens': max_tokens, + } + + if system_prompt: + payload['instructions'] = system_prompt + + if tools: + payload['tools'] = self._convert_tools_responses(tools) + payload['tool_choice'] = 'auto' + + return payload def _convert_messages(self, messages: list[Message]) -> list[dict]: - """Convert Message objects to OpenAI API format.""" + """Convert Message objects to OpenAI Chat Completions API format.""" result = [] for msg in messages: @@ -191,8 +279,53 @@ def _convert_messages(self, messages: list[Message]) -> list[dict]: return result + def _convert_messages_responses( + self, messages: list[Message] + ) -> list[dict]: + """Convert Message objects to OpenAI Responses API format.""" + result = [] + + for msg in messages: + if msg.role == Role.SYSTEM: + result.append({ + 'role': 'developer', + 'content': msg.content + }) + + elif msg.role == Role.USER: + result.append({ + 'role': 'user', + 'content': msg.content + }) + + elif msg.role == Role.ASSISTANT: + if msg.content: + result.append({ + 'role': 'assistant', + 'content': msg.content + }) + # Tool calls are separate items in Responses API + if msg.tool_calls: + for tc in msg.tool_calls: + result.append({ + 'type': 'function_call', + 'call_id': tc.id, + 'name': tc.name, + 'arguments': json.dumps(tc.arguments) + }) + + elif msg.role == Role.TOOL: + for tr in msg.tool_results: + result.append({ + 'type': 'function_call_output', + 'call_id': tr.tool_call_id, + 'output': tr.content + }) + + return result + def _convert_tools(self, tools: list[Tool]) -> list[dict]: - """Convert Tool objects to OpenAI API format.""" + """Convert Tool objects to Chat Completions API format.""" return [ { 'type': 'function', @@ -205,6 +338,18 @@ def _convert_tools(self, tools: list[Tool]) -> list[dict]: for tool in tools ] + def _convert_tools_responses(self, tools: list[Tool]) -> list[dict]: + """Convert Tool objects to Responses API format.""" + return [ + { + 'type': 'function', + 'name': tool.name, + 'description': tool.description, + 'parameters': tool.parameters + } + for tool in tools + ] + def _make_request(self, payload: dict) -> dict: """Make an HTTP request to the OpenAI API.""" headers = { @@ -256,7 +401,7 @@ def _make_request(self, payload: dict) -> dict: )) def _parse_response(self, data: dict) -> LLMResponse: - """Parse the OpenAI API response into an LLMResponse.""" + """Parse the Chat Completions API response into an LLMResponse.""" # Check for API-level errors in the response if 'error' in data: error_info = data['error'] @@ -357,6 +502,90 @@ def _parse_response(self, data: dict) -> LLMResponse: raw_response=data ) + def _parse_responses_response(self, data: dict) -> LLMResponse: + """Parse the Responses API response into an LLMResponse.""" + # Check for API-level errors + if 'error' in data: + error_info = data['error'] + raise LLMClientError(LLMError( + message=error_info.get('message', 'Unknown API error'), + code=error_info.get('code', 'unknown'), + provider=self.provider_name, + retryable=False + )) + + output = data.get('output', []) + content = '' + tool_calls = [] + + for item in output: + item_type = item.get('type', '') + + if item_type == 'message': + for part in item.get('content', []): + if part.get('type') == 'output_text': + content += part.get('text', '') + + elif item_type == 'function_call': + try: + arguments = json.loads( + item.get('arguments', '{}') + ) + except json.JSONDecodeError: + arguments = {} + + tool_calls.append(ToolCall( + id=item.get('call_id', str(uuid.uuid4())), + name=item.get('name', ''), + arguments=arguments + )) + + # Determine stop reason + status = data.get('status', '') + if tool_calls: + stop_reason = StopReason.TOOL_USE + elif status == 'completed': + stop_reason = StopReason.END_TURN + elif status == 'incomplete': + stop_reason = StopReason.MAX_TOKENS + else: + stop_reason = StopReason.UNKNOWN + + # Parse usage information + usage_data = data.get('usage', {}) + usage = Usage( + input_tokens=usage_data.get('input_tokens', 0), + output_tokens=usage_data.get('output_tokens', 0), + total_tokens=usage_data.get('total_tokens', 0) + ) + + # Check for problematic responses + if not content and not tool_calls: + if stop_reason == StopReason.MAX_TOKENS: + input_tokens = usage.input_tokens + raise LLMClientError(LLMError( + message=f'Response truncated due to token limit ' + f'(input: {input_tokens} tokens). ' + f'The request is too large for model ' + f'{self._model}. ' + f'Try using a model with a larger context ' + f'window, or analyze a smaller scope (e.g., a ' + f'specific schema instead of the entire ' + f'database).', + code='max_tokens', + provider=self.provider_name, + retryable=False + )) + + return LLMResponse( + content=content, + tool_calls=tool_calls, + stop_reason=stop_reason, + model=data.get('model', self._model), + usage=usage, + raw_response=data + ) + def chat_stream( self, messages: list[Message], @@ -367,30 +596,41 @@ def chat_stream( **kwargs ) -> Generator[Union[str, LLMResponse], None, None]: """Stream a chat response from OpenAI.""" - converted_messages = self._convert_messages(messages) - - if system_prompt: - converted_messages.insert(0, { - 'role': 'system', - 'content': system_prompt - }) - - payload = { - 'model': self._model, - 'messages': converted_messages, - 'max_completion_tokens': max_tokens, - 'stream': True, - 'stream_options': {'include_usage': True} - } + if self._use_responses_api: + payload = self._build_responses_payload( + messages, tools, system_prompt, max_tokens + ) + payload['stream'] = True + try: + yield from self._process_stream(payload) + except LLMClientError: + raise + except Exception as e: + raise LLMClientError(LLMError( + message=f"Streaming request failed: {str(e)}", + provider=self.provider_name + )) + return - if tools: - payload['tools'] = self._convert_tools(tools) - payload['tool_choice'] = 'auto' + # Try Chat Completions API first + payload = self._build_chat_payload( + messages, tools, system_prompt, max_tokens + ) + payload['stream'] = True + payload['stream_options'] = {'include_usage': True} try: yield from self._process_stream(payload) - except LLMClientError: - raise + except LLMClientError as e: + if self._should_use_responses_api(e): + self._use_responses_api = True + payload = self._build_responses_payload( + messages, tools, system_prompt, max_tokens + ) + payload['stream'] = True + yield from self._process_stream(payload) + else: + raise except Exception as e: raise LLMClientError(LLMError( message=f"Streaming request failed: {str(e)}", @@ -449,16 +689,19 @@ def _process_stream( )) try: - yield from self._read_openai_stream(response) + if self._use_responses_api: + yield from self._read_responses_stream(response) + else: + yield from self._read_openai_stream(response) finally: response.close() def _read_openai_stream( self, response ) -> Generator[Union[str, LLMResponse], None, None]: - """Read and parse an OpenAI-format SSE stream. + """Read and parse an OpenAI Chat Completions SSE stream. - Uses readline() for incremental reading — it returns as soon + Uses readline() for incremental reading -- it returns as soon as a complete line arrives from the server, unlike read() which blocks until a buffer fills up. """ @@ -574,3 +817,114 @@ def _read_openai_stream( model=model_name, usage=usage ) + + def _read_responses_stream( + self, response + ) -> Generator[Union[str, LLMResponse], None, None]: + """Read and parse an OpenAI Responses API SSE stream. + + The Responses API uses named events with types like + response.output_text.delta for text streaming and + response.completed for the final response. + """ + content_parts = [] + # tool_calls_data: {call_id: {name, arguments}} + tool_calls_data = {} + model_name = self._model + usage = Usage() + + while True: + line_bytes = response.readline() + if not line_bytes: + break + + line = line_bytes.decode('utf-8', errors='replace').strip() + + if not line or line.startswith(':'): + continue + + # Skip event type lines - we identify events by data type field + if line.startswith('event: '): + continue + + if not line.startswith('data: '): + continue + + try: + data = json.loads(line[6:]) + except json.JSONDecodeError: + continue + + event_type = data.get('type', '') + + if event_type == 'response.output_text.delta': + delta = data.get('delta', '') + if delta: + content_parts.append(delta) + yield delta + + elif event_type == 'response.output_item.added': + item = data.get('item', {}) + if item.get('type') == 'function_call': + call_id = item.get('call_id', '') + tool_calls_data[call_id] = { + 'name': item.get('name', ''), + 'arguments': '' + } + + elif event_type == 'response.function_call_arguments.delta': + call_id = data.get('call_id', '') + if call_id not in tool_calls_data: + tool_calls_data[call_id] = { + 'name': '', 'arguments': '' + } + tool_calls_data[call_id]['arguments'] += data.get( + 'delta', '' + ) + + elif event_type == 'response.completed': + resp = data.get('response', {}) + u = resp.get('usage', {}) + usage = Usage( + input_tokens=u.get('input_tokens', 0), + output_tokens=u.get('output_tokens', 0), + total_tokens=u.get('total_tokens', 0) + ) + model_name = resp.get('model', model_name) + + # Build final response + content = ''.join(content_parts) + tool_calls = [] + for call_id, tc in tool_calls_data.items(): + try: + arguments = json.loads(tc['arguments']) \ + if tc['arguments'] else {} + except json.JSONDecodeError: + arguments = {} + tool_calls.append(ToolCall( + id=call_id or str(uuid.uuid4()), + name=tc['name'], + arguments=arguments + )) + + if tool_calls: + stop_reason = StopReason.TOOL_USE + elif content: + stop_reason = StopReason.END_TURN + else: + stop_reason = StopReason.UNKNOWN + + if not content and not tool_calls: + raise LLMClientError(LLMError( + message='No response content returned from API', + provider=self.provider_name, + retryable=False + )) + + yield LLMResponse( + content=content, + tool_calls=tool_calls, + stop_reason=stop_reason, + model=model_name, + usage=usage + ) From 4959c752bc8e53dddad8eb6cf3c9b3ac6bfef38a Mon Sep 17 00:00:00 2001 From: Dave Page Date: Fri, 27 Mar 2026 10:30:19 +0000 Subject: [PATCH 2/3] Address CodeRabbit review feedback on OpenAI provider. - Preserve exception chains with 'raise ... from e' in all exception handlers for better debugging tracebacks. - Use f-string !s conversion instead of str() calls. - Extract duplicated max_tokens error handling into a shared _raise_max_tokens_error() helper method. Co-Authored-By: Claude Opus 4.6 (1M context) --- web/pgadmin/llm/providers/openai.py | 62 ++++++++++++----------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/web/pgadmin/llm/providers/openai.py b/web/pgadmin/llm/providers/openai.py index 2fb33e985e5..42e4f091437 100644 --- a/web/pgadmin/llm/providers/openai.py +++ b/web/pgadmin/llm/providers/openai.py @@ -142,9 +142,9 @@ def chat( raise except Exception as e: raise LLMClientError(LLMError( - message=f"Request failed: {str(e)}", + message=f"Request failed: {e!s}", provider=self.provider_name - )) + )) from e def _chat_responses( self, @@ -165,9 +165,9 @@ def _chat_responses( raise except Exception as e: raise LLMClientError(LLMError( - message=f"Request failed: {str(e)}", + message=f"Request failed: {e!s}", provider=self.provider_name - )) + )) from e def _should_use_responses_api(self, error: LLMClientError) -> bool: """Check if the error indicates we should use the Responses API.""" @@ -400,6 +400,22 @@ def _make_request(self, payload: dict) -> dict: retryable=True )) + def _raise_max_tokens_error(self, input_tokens: int): + """Raise an error when a response is truncated due to token limit.""" + raise LLMClientError(LLMError( + message=f'Response truncated due to token limit ' + f'(input: {input_tokens} tokens). ' + f'The request is too large for model ' + f'{self._model}. ' + f'Try using a model with a larger context ' + f'window, or analyze a smaller scope (e.g., a ' + f'specific schema instead of the entire ' + f'database).', + code='max_tokens', + provider=self.provider_name, + retryable=False + )) + def _parse_response(self, data: dict) -> LLMResponse: """Parse the Chat Completions API response into an LLMResponse.""" # Check for API-level errors in the response @@ -470,20 +486,7 @@ def _parse_response(self, data: dict) -> LLMResponse: # Check for problematic responses if not content and not tool_calls: if stop_reason == StopReason.MAX_TOKENS: - input_tokens = usage.input_tokens - raise LLMClientError(LLMError( - message=f'Response truncated due to token limit ' - f'(input: {input_tokens} tokens). ' - f'The request is too large for model ' - f'{self._model}. ' - f'Try using a model with a larger context ' - f'window, or analyze a smaller scope (e.g., a ' - f'specific schema instead of the entire ' - f'database).', - code='max_tokens', - provider=self.provider_name, - retryable=False - )) + self._raise_max_tokens_error(usage.input_tokens) elif finish_reason and finish_reason not in ('stop', 'tool_calls'): raise LLMClientError(LLMError( message=(f'Empty response with finish reason: ' @@ -562,20 +565,7 @@ def _parse_responses_response(self, data: dict) -> LLMResponse: # Check for problematic responses if not content and not tool_calls: if stop_reason == StopReason.MAX_TOKENS: - input_tokens = usage.input_tokens - raise LLMClientError(LLMError( - message=f'Response truncated due to token limit ' - f'(input: {input_tokens} tokens). ' - f'The request is too large for model ' - f'{self._model}. ' - f'Try using a model with a larger context ' - f'window, or analyze a smaller scope (e.g., a ' - f'specific schema instead of the entire ' - f'database).', - code='max_tokens', - provider=self.provider_name, - retryable=False - )) + self._raise_max_tokens_error(usage.input_tokens) return LLMResponse( content=content, @@ -607,9 +597,9 @@ def chat_stream( raise except Exception as e: raise LLMClientError(LLMError( - message=f"Streaming request failed: {str(e)}", + message=f"Streaming request failed: {e!s}", provider=self.provider_name - )) + )) from e return # Try Chat Completions API first @@ -633,9 +623,9 @@ def chat_stream( raise except Exception as e: raise LLMClientError(LLMError( - message=f"Streaming request failed: {str(e)}", + message=f"Streaming request failed: {e!s}", provider=self.provider_name - )) + )) from e def _process_stream( self, payload: dict From eaa92612651a1d2070f27e50a733c879349c702c Mon Sep 17 00:00:00 2001 From: Dave Page Date: Fri, 27 Mar 2026 11:05:13 +0000 Subject: [PATCH 3/3] Validate api_url and use incomplete_details from Responses API. - Strip known endpoint suffixes (/chat/completions, /responses) from api_url in __init__ to prevent doubled paths if a user provides a full endpoint URL instead of a base URL. - Use incomplete_details.reason from the Responses API to properly distinguish between max_output_tokens and content_filter when the response status is 'incomplete', in both the non-streaming and streaming parsers. Co-Authored-By: Claude Opus 4.6 (1M context) --- web/pgadmin/llm/providers/openai.py | 39 ++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/web/pgadmin/llm/providers/openai.py b/web/pgadmin/llm/providers/openai.py index 42e4f091437..d455eced9fb 100644 --- a/web/pgadmin/llm/providers/openai.py +++ b/web/pgadmin/llm/providers/openai.py @@ -68,7 +68,13 @@ def __init__(self, api_key: Optional[str] = None, """ self._api_key = api_key or '' self._model = model or DEFAULT_MODEL - self._base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/') + base_url = (api_url or DEFAULT_API_BASE_URL).rstrip('/') + # Strip known endpoint suffixes in case the user provided a full URL + for suffix in ('/chat/completions', '/responses'): + if base_url.endswith(suffix): + base_url = base_url[:-len(suffix)].rstrip('/') + break + self._base_url = base_url self._use_responses_api = False @property @@ -543,14 +549,22 @@ def _parse_responses_response(self, data: dict) -> LLMResponse: arguments=arguments )) - # Determine stop reason + # Determine stop reason from status and incomplete_details status = data.get('status', '') if tool_calls: stop_reason = StopReason.TOOL_USE elif status == 'completed': stop_reason = StopReason.END_TURN elif status == 'incomplete': - stop_reason = StopReason.MAX_TOKENS + reason = data.get( + 'incomplete_details', {} + ).get('reason', '') + if reason == 'content_filter': + stop_reason = StopReason.STOP_SEQUENCE + elif reason == 'max_output_tokens': + stop_reason = StopReason.MAX_TOKENS + else: + stop_reason = StopReason.MAX_TOKENS else: stop_reason = StopReason.UNKNOWN @@ -566,6 +580,13 @@ def _parse_responses_response(self, data: dict) -> LLMResponse: if not content and not tool_calls: if stop_reason == StopReason.MAX_TOKENS: self._raise_max_tokens_error(usage.input_tokens) + elif stop_reason == StopReason.STOP_SEQUENCE: + raise LLMClientError(LLMError( + message='Response blocked by content filter.', + code='content_filter', + provider=self.provider_name, + retryable=False + )) return LLMResponse( content=content, @@ -822,6 +843,8 @@ def _read_responses_stream( tool_calls_data = {} model_name = self._model usage = Usage() + resp_status = '' + resp_incomplete = {} while True: line_bytes = response.readline() @@ -881,6 +904,8 @@ def _read_responses_stream( total_tokens=u.get('total_tokens', 0) ) model_name = resp.get('model', model_name) + resp_status = resp.get('status', '') + resp_incomplete = resp.get('incomplete_details', {}) # Build final response content = ''.join(content_parts) @@ -897,8 +922,16 @@ def _read_responses_stream( arguments=arguments )) + # Determine stop reason from final response status if tool_calls: stop_reason = StopReason.TOOL_USE + elif resp_status == 'incomplete': + reason = resp_incomplete.get('reason', '') \ + if resp_incomplete else '' + if reason == 'content_filter': + stop_reason = StopReason.STOP_SEQUENCE + else: + stop_reason = StopReason.MAX_TOKENS elif content: stop_reason = StopReason.END_TURN else: