fix: Handle empty string texts correctly in embed_stream

fede-kamel · fede-kamel · commit e0cdab3ce46e · 2026-01-25T22:05:51.000-05:00
Change truthiness check to explicit None check so empty strings
are handled correctly and get proper global indices.
diff --git a/src/cohere/base_client.py b/src/cohere/base_client.py
@@ -1229,7 +1229,8 @@ def embed_stream(
             for embedding in parser.iter_embeddings():
                 # The parser sets embedding.text correctly for multiple embedding types
                 # Adjust the global index based on text position in batch
-                if embedding.text and embedding.text in batch_texts:
+                # Use 'is not None' to handle empty strings correctly (they are falsy but valid)
+                if embedding.text is not None and embedding.text in batch_texts:
                     # Get or create the set of used indices for this embedding type
                     emb_type = embedding.embedding_type
                     if emb_type not in used_batch_indices_by_type:
diff --git a/src/cohere/v2/client.py b/src/cohere/v2/client.py
@@ -610,7 +610,8 @@ def embed_stream(
             for embedding in parser.iter_embeddings():
                 # The parser sets embedding.text correctly for multiple embedding types
                 # Adjust the global index based on text position in batch
-                if embedding.text and embedding.text in batch_texts:
+                # Use 'is not None' to handle empty strings correctly (they are falsy but valid)
+                if embedding.text is not None and embedding.text in batch_texts:
                     # Get or create the set of used indices for this embedding type
                     emb_type = embedding.embedding_type
                     if emb_type not in used_batch_indices_by_type: