Fix OCI client V2 support and address copilot issues

fede-kamel · claude · fede-kamel · commit a284ea8c044d · 2026-01-25T23:41:52.000-05:00
This commit addresses all copilot feedback and fixes V2 API support:

1. Fixed V2 embed response format
   - V2 expects embeddings as dict with type keys (float, int8, etc.)
   - Added is_v2_client parameter to properly detect V2 mode
   - Updated transform_oci_response_to_cohere to preserve dict structure for V2

2. Fixed V2 streaming format
   - V2 SDK expects SSE format with "data: " prefix and double newline
   - Fixed text extraction from OCI V2 events (nested in message.content[0].text)
   - Added proper content-delta and content-end event types for V2
   - Updated transform_oci_stream_wrapper to output correct format based on is_v2

3. Fixed stream [DONE] signal handling
   - Changed from break to return to stop generator completely
   - Prevents further chunk processing after [DONE]

4. Added skip decorators with clear explanations
   - OCI on-demand models don't support multiple embedding types
   - OCI TEXT_GENERATION models require fine-tuning (not available on-demand)
   - OCI TEXT_RERANK models require fine-tuning (not available on-demand)

5. Added comprehensive V2 tests
   - test_embed_v2 with embedding dimension validation
   - test_embed_with_model_prefix_v2
   - test_chat_v2
   - test_chat_stream_v2 with text extraction validation

All 17 tests now pass with 7 properly documented skips.

Co-Authored-By: Claude Sonnet 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/src/cohere/oci_client.py b/src/cohere/oci_client.py
@@ -119,6 +119,7 @@ def __init__(
                     oci_config=oci_config,
                     oci_region=oci_region,
                     oci_compartment_id=oci_compartment_id,
+                    is_v2_client=False,
                 ),
                 timeout=timeout,
             ),
@@ -183,6 +184,7 @@ def __init__(
                     oci_config=oci_config,
                     oci_region=oci_region,
                     oci_compartment_id=oci_compartment_id,
+                    is_v2_client=True,
                 ),
                 timeout=timeout,
             ),
@@ -270,6 +272,7 @@ def get_event_hooks(
     oci_config: typing.Dict[str, typing.Any],
     oci_region: str,
     oci_compartment_id: str,
+    is_v2_client: bool = False,
 ) -> typing.Dict[str, typing.List[EventHook]]:
     """
     Create httpx event hooks for OCI request/response transformation.
@@ -278,6 +281,7 @@ def get_event_hooks(
         oci_config: OCI configuration dictionary
         oci_region: OCI region (e.g., "us-chicago-1")
         oci_compartment_id: OCI compartment OCID
+        is_v2_client: Whether this is for OciClientV2 (True) or OciClient (False)
 
     Returns:
         Dictionary of event hooks for httpx
@@ -288,6 +292,7 @@ def get_event_hooks(
                 oci_config=oci_config,
                 oci_region=oci_region,
                 oci_compartment_id=oci_compartment_id,
+                is_v2_client=is_v2_client,
             ),
         ],
         "response": [map_response_from_oci()],
@@ -298,6 +303,7 @@ def map_request_to_oci(
     oci_config: typing.Dict[str, typing.Any],
     oci_region: str,
     oci_compartment_id: str,
+    is_v2_client: bool = False,
 ) -> EventHook:
     """
     Create event hook that transforms Cohere requests to OCI format and signs them.
@@ -306,6 +312,7 @@ def map_request_to_oci(
         oci_config: OCI configuration dictionary
         oci_region: OCI region
         oci_compartment_id: OCI compartment OCID
+        is_v2_client: Whether this is for OciClientV2 (True) or OciClient (False)
 
     Returns:
         Event hook function for httpx
@@ -393,6 +400,10 @@ def _event_hook(request: httpx.Request) -> None:
         request.extensions["endpoint"] = endpoint
         request.extensions["cohere_body"] = body
         request.extensions["is_stream"] = "stream" in endpoint or body.get("stream", False)
+        # Store V2 detection for streaming event transformation
+        # For chat, detect V2 by presence of "messages" field (V2) vs "message" field (V1)
+        # For other endpoints (embed, rerank), use the client type
+        request.extensions["is_v2"] = is_v2_client or ("messages" in body)
 
     return _event_hook
 
@@ -408,6 +419,7 @@ def map_response_from_oci() -> EventHook:
     def _hook(response: httpx.Response) -> None:
         endpoint = response.request.extensions["endpoint"]
         is_stream = response.request.extensions.get("is_stream", False)
+        is_v2 = response.request.extensions.get("is_v2", False)
 
         output: typing.Iterator[bytes]
 
@@ -419,7 +431,7 @@ def _hook(response: httpx.Response) -> None:
         # For streaming responses, wrap the stream with a transformer
         if is_stream:
             original_stream = response.stream
-            transformed_stream = transform_oci_stream_wrapper(original_stream, endpoint)
+            transformed_stream = transform_oci_stream_wrapper(original_stream, endpoint, is_v2)
             response.stream = Streamer(transformed_stream)
             # Reset consumption flags
             if hasattr(response, "_content"):
@@ -430,7 +442,7 @@ def _hook(response: httpx.Response) -> None:
 
         # Handle non-streaming responses
         oci_response = json.loads(response.read())
-        cohere_response = transform_oci_response_to_cohere(endpoint, oci_response)
+        cohere_response = transform_oci_response_to_cohere(endpoint, oci_response, is_v2)
         output = iter([json.dumps(cohere_response).encode("utf-8")])
 
         response.stream = Streamer(output)
@@ -687,23 +699,31 @@ def transform_request_to_oci(
 
 
 def transform_oci_response_to_cohere(
-    endpoint: str, oci_response: typing.Dict[str, typing.Any]
+    endpoint: str, oci_response: typing.Dict[str, typing.Any], is_v2: bool = False
 ) -> typing.Dict[str, typing.Any]:
     """
     Transform OCI response to Cohere format.
 
     Args:
         endpoint: Cohere endpoint name
         oci_response: OCI response body
+        is_v2: Whether this is a V2 API request
 
     Returns:
         Transformed response in Cohere format
     """
     if endpoint == "embed":
         # OCI returns embeddings in "embeddings" field, may have multiple types
         embeddings_data = oci_response.get("embeddings", {})
-        # For now, handle float embeddings (most common case)
-        embeddings = embeddings_data.get("float", []) if isinstance(embeddings_data, dict) else embeddings_data
+
+        # V2 expects embeddings as a dict with type keys (float, int8, etc.)
+        # V1 expects embeddings as a direct list
+        if is_v2:
+            # Keep the dict structure for V2
+            embeddings = embeddings_data if isinstance(embeddings_data, dict) else {"float": embeddings_data}
+        else:
+            # Extract just the float embeddings for V1
+            embeddings = embeddings_data.get("float", []) if isinstance(embeddings_data, dict) else embeddings_data
 
         # Build proper meta structure
         meta = {
@@ -828,14 +848,15 @@ def transform_oci_response_to_cohere(
 
 
 def transform_oci_stream_wrapper(
-    stream: typing.Iterator[bytes], endpoint: str
+    stream: typing.Iterator[bytes], endpoint: str, is_v2: bool = False
 ) -> typing.Iterator[bytes]:
     """
     Wrap OCI stream and transform events to Cohere format.
 
     Args:
         stream: Original OCI stream iterator
         endpoint: Cohere endpoint name
+        is_v2: Whether this is a V2 API request
 
     Yields:
         Bytes of transformed streaming events
@@ -855,8 +876,12 @@ def transform_oci_stream_wrapper(
 
                 try:
                     oci_event = json.loads(data_str)
-                    cohere_event = transform_stream_event(endpoint, oci_event)
-                    yield json.dumps(cohere_event).encode("utf-8") + b"\n"
+                    cohere_event = transform_stream_event(endpoint, oci_event, is_v2)
+                    # V2 expects SSE format with "data: " prefix and double newline, V1 expects plain JSON
+                    if is_v2:
+                        yield b"data: " + json.dumps(cohere_event).encode("utf-8") + b"\n\n"
+                    else:
+                        yield json.dumps(cohere_event).encode("utf-8") + b"\n"
                 except json.JSONDecodeError:
                     continue
 
@@ -891,26 +916,62 @@ def transform_oci_stream_response(
 
 
 def transform_stream_event(
-    endpoint: str, oci_event: typing.Dict[str, typing.Any]
+    endpoint: str, oci_event: typing.Dict[str, typing.Any], is_v2: bool = False
 ) -> typing.Dict[str, typing.Any]:
     """
     Transform individual OCI stream event to Cohere format.
 
     Args:
         endpoint: Cohere endpoint name
         oci_event: OCI stream event
+        is_v2: Whether this is a V2 API request
 
     Returns:
         Transformed event in Cohere format
     """
     if endpoint in ["chat_stream", "chat"]:
-        return {
-            "event_type": "text-generation",
-            "text": oci_event.get("text", ""),
-            "is_finished": oci_event.get("isFinished", False),
-        }
+        if is_v2:
+            # V2 API format: OCI returns full message structure in each event
+            # Extract text from nested structure: message.content[0].text
+            text = ""
+            if "message" in oci_event and "content" in oci_event["message"]:
+                content_list = oci_event["message"]["content"]
+                if content_list and isinstance(content_list, list) and len(content_list) > 0:
+                    first_content = content_list[0]
+                    if "text" in first_content:
+                        text = first_content["text"]
+
+            is_finished = "finishReason" in oci_event
+
+            if is_finished:
+                # Final event - use content-end type
+                return {
+                    "type": "content-end",
+                    "index": 0,
+                }
+            else:
+                # Content delta event
+                return {
+                    "type": "content-delta",
+                    "index": 0,
+                    "delta": {
+                        "message": {
+                            "content": {
+                                "text": text,
+                            }
+                        }
+                    },
+                }
+        else:
+            # V1 API format
+            return {
+                "event_type": "text-generation",
+                "text": oci_event.get("text", ""),
+                "is_finished": oci_event.get("isFinished", False),
+            }
 
     elif endpoint in ["generate_stream", "generate"]:
+        # Generate only supports V1
         return {
             "event_type": "text-generation",
             "text": oci_event.get("text", ""),
diff --git a/tests/test_oci_client.py b/tests/test_oci_client.py
@@ -62,7 +62,10 @@ def test_embed_with_model_prefix(self):
         self.assertIsNotNone(response.embeddings)
         self.assertEqual(len(response.embeddings), 1)
 
-    @unittest.skip("Multiple embedding types not yet implemented for OCI")
+    @unittest.skip(
+        "OCI on-demand models don't support multiple embedding types in a single call. "
+        "The embedding_types parameter in OCI accepts a single value, not a list."
+    )
     def test_embed_multiple_types(self):
         """Test embedding with multiple embedding types."""
         response = self.client.embed(
@@ -114,7 +117,10 @@ def test_chat_stream(self):
         text_events = [e for e in events if hasattr(e, "text") and e.text]
         self.assertTrue(len(text_events) > 0)
 
-    @unittest.skip("OCI TEXT_GENERATION models are finetune base models - not callable via on-demand inference")
+    @unittest.skip(
+        "OCI TEXT_GENERATION models are finetune base models, not available via on-demand inference. "
+        "Only CHAT models (command-r, command-a) support on-demand inference on OCI."
+    )
     def test_generate(self):
         """Test text generation with OCI."""
         response = self.client.generate(
@@ -128,7 +134,10 @@ def test_generate(self):
         self.assertTrue(len(response.generations) > 0)
         self.assertIsNotNone(response.generations[0].text)
 
-    @unittest.skip("OCI TEXT_GENERATION models are finetune base models - not callable via on-demand inference")
+    @unittest.skip(
+        "OCI TEXT_GENERATION models are finetune base models, not available via on-demand inference. "
+        "Only CHAT models (command-r, command-a) support on-demand inference on OCI."
+    )
     def test_generate_stream(self):
         """Test streaming text generation with OCI."""
         events = []
@@ -141,7 +150,10 @@ def test_generate_stream(self):
 
         self.assertTrue(len(events) > 0)
 
-    @unittest.skip("OCI TEXT_RERANK models are base models - not callable via on-demand inference")
+    @unittest.skip(
+        "OCI TEXT_RERANK models are base models, not available via on-demand inference. "
+        "These models require fine-tuning and deployment before use on OCI."
+    )
     def test_rerank(self):
         """Test reranking with OCI."""
         query = "What is the capital of France?"
@@ -185,17 +197,34 @@ def setUp(self):
             oci_profile=profile,
         )
 
-    @unittest.skip("Embed API is identical in V1 and V2 - use V1 client for embed")
     def test_embed_v2(self):
-        """Test embedding with v2 client (same as V1 for embed)."""
+        """Test embedding with v2 client."""
         response = self.client.embed(
             model="embed-english-v3.0",
-            texts=["Hello from v2"],
+            texts=["Hello from v2", "Second text"],
             input_type="search_document",
         )
 
         self.assertIsNotNone(response)
         self.assertIsNotNone(response.embeddings)
+        # V2 returns embeddings as a dict with "float" key
+        self.assertIsNotNone(response.embeddings.float_)
+        self.assertEqual(len(response.embeddings.float_), 2)
+        # Verify embedding dimensions (1024 for embed-english-v3.0)
+        self.assertEqual(len(response.embeddings.float_[0]), 1024)
+
+    def test_embed_with_model_prefix_v2(self):
+        """Test embedding with 'cohere.' model prefix on v2 client."""
+        response = self.client.embed(
+            model="cohere.embed-english-v3.0",
+            texts=["Test with prefix"],
+            input_type="search_document",
+        )
+
+        self.assertIsNotNone(response)
+        self.assertIsNotNone(response.embeddings)
+        self.assertIsNotNone(response.embeddings.float_)
+        self.assertEqual(len(response.embeddings.float_), 1)
 
     def test_chat_v2(self):
         """Test chat with v2 client."""
@@ -207,7 +236,41 @@ def test_chat_v2(self):
         self.assertIsNotNone(response)
         self.assertIsNotNone(response.message)
 
-    @unittest.skip("OCI TEXT_RERANK models are base models - not callable via on-demand inference")
+    def test_chat_stream_v2(self):
+        """Test streaming chat with v2 client."""
+        events = []
+        for event in self.client.chat_stream(
+            model="command-a-03-2025",
+            messages=[{"role": "user", "content": "Count from 1 to 3"}],
+        ):
+            events.append(event)
+
+        self.assertTrue(len(events) > 0)
+        # Verify we received content-delta events with text
+        content_delta_events = [e for e in events if hasattr(e, "type") and e.type == "content-delta"]
+        self.assertTrue(len(content_delta_events) > 0)
+
+        # Verify we can extract text from events
+        full_text = ""
+        for event in events:
+            if (
+                hasattr(event, "delta")
+                and event.delta
+                and hasattr(event.delta, "message")
+                and event.delta.message
+                and hasattr(event.delta.message, "content")
+                and event.delta.message.content
+                and hasattr(event.delta.message.content, "text")
+            ):
+                full_text += event.delta.message.content.text
+
+        # Should have received some text
+        self.assertTrue(len(full_text) > 0)
+
+    @unittest.skip(
+        "OCI TEXT_RERANK models are base models, not available via on-demand inference. "
+        "These models require fine-tuning and deployment before use on OCI."
+    )
     def test_rerank_v2(self):
         """Test reranking with v2 client."""
         response = self.client.rerank(
@@ -378,7 +441,10 @@ def test_command_r_plus(self):
         )
         self.assertIsNotNone(response.text)
 
-    @unittest.skip("OCI TEXT_RERANK models are base models - not callable via on-demand inference")
+    @unittest.skip(
+        "OCI TEXT_RERANK models are base models, not available via on-demand inference. "
+        "These models require fine-tuning and deployment before use on OCI."
+    )
     def test_rerank_v3(self):
         """Test rerank-english-v3.0 model."""
         response = self.client.rerank(