chore: cicd fix and code cleanup (#23)

shyam-cb · web-flow · commit 603ef2375a71 · 2026-03-12T11:53:08.000+05:30
* code cleanup v1

* code cleanup v2

* ci cd python version upgrade
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -18,7 +18,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.8"
+          python-version: "3.10"
 
       - name: Install Hatch
         run: |
@@ -32,9 +32,11 @@ jobs:
     strategy:
       matrix:
         python-version:
-          - "3.8.x"
-          - "3.9.x"
           - "3.10.x"
+          - "3.11.x"
+          - "3.12.x"
+          - "3.13.x"
+          - "3.14.x"
         os:
           - ubuntu-24.04
           # - windows-latest
diff --git a/examples/query/indexing_pipeline.py b/examples/query/indexing_pipeline.py
@@ -8,6 +8,7 @@
 
 import requests
 from couchbase.n1ql import QueryScanConsistency
+from couchbase.options import KnownConfigProfiles, QueryOptions
 from haystack import Pipeline
 from haystack.components.converters import TextFileToDocument
 from haystack.components.embedders import SentenceTransformersDocumentEmbedder
@@ -16,13 +17,12 @@
 from haystack.utils import Secret
 
 from couchbase_haystack import (
+    CouchbaseClusterOptions,
     CouchbasePasswordAuthenticator,
     CouchbaseQueryDocumentStore,
     CouchbaseQueryOptions,
     QueryVectorSearchType,
-    CouchbaseClusterOptions,
 )
-from couchbase.options import KnownConfigProfiles, QueryOptions
 
 logger = logging.getLogger(__name__)
 
@@ -59,7 +59,9 @@ def fetch_archive_from_http(url: str, output_dir: str):
 
 document_store = CouchbaseQueryDocumentStore(
     cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
-    authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
+    authenticator=CouchbasePasswordAuthenticator(
+        username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
+    ),
     cluster_options=CouchbaseClusterOptions(profile=KnownConfigProfiles.WanDevelopment),
     bucket=bucket_name,
     scope=scope_name,
@@ -102,6 +104,9 @@ def fetch_archive_from_http(url: str, output_dir: str):
     "description": "IVF,PQ32x8",
     "similarity": "L2",
 }
-document_store.scope.query(f"Create Index {index_name} ON {collection_name} (embedding vector) USING GSI WITH {json.dumps(cfg)}", QueryOptions(timeout=timedelta(seconds=300))).execute()
+document_store.scope.query(
+    f"Create Index {index_name} ON {collection_name} (embedding vector) USING GSI WITH {json.dumps(cfg)}",
+    QueryOptions(timeout=timedelta(seconds=300)),
+).execute()
 
-logger.info(f"Index created: {index_name}")
+logger.info(f"Index created: {index_name}")
diff --git a/examples/query/rag_pipeline.py b/examples/query/rag_pipeline.py
@@ -1,20 +1,22 @@
 import os
+
+from couchbase.options import KnownConfigProfiles
 from haystack import GeneratedAnswer, Pipeline
 from haystack.components.builders.answer_builder import AnswerBuilder
 from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
-from haystack.dataclasses import ChatMessage
 from haystack.components.embedders import SentenceTransformersTextEmbedder
 from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
-from haystack.utils.hf import HFGenerationAPIType
+from haystack.dataclasses import ChatMessage
 from haystack.utils import Secret
+from haystack.utils.hf import HFGenerationAPIType
+
 from couchbase_haystack import (
     CouchbaseClusterOptions,
     CouchbasePasswordAuthenticator,
     CouchbaseQueryDocumentStore,
     CouchbaseQueryEmbeddingRetriever,
     QueryVectorSearchType,
 )
-from couchbase.options import KnownConfigProfiles
 
 # Load HF Token from environment variables.
 HF_TOKEN = Secret.from_env_var("HF_API_TOKEN")
@@ -29,7 +31,9 @@
 
 document_store = CouchbaseQueryDocumentStore(
     cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
-    authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
+    authenticator=CouchbasePasswordAuthenticator(
+        username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
+    ),
     cluster_options=CouchbaseClusterOptions(
         profile=KnownConfigProfiles.WanDevelopment,
     ),
@@ -45,14 +49,16 @@
 # interacting with LLMs using a custom prompt.
 prompt_messages = [
     ChatMessage.from_system("You are a helpful assistant that answers questions based on the provided documents."),
-    ChatMessage.from_user("""Given these documents, answer the question.
+    ChatMessage.from_user(
+        """Given these documents, answer the question.
 Documents:
 {% for doc in documents %}
     {{ doc.content }}
 {% endfor %}
 
 Question: {{question}}
-Answer:""")
+Answer:"""
+    ),
 ]
 rag_pipeline = Pipeline()
 rag_pipeline.add_component(
@@ -61,10 +67,13 @@
 )
 rag_pipeline.add_component("retriever", CouchbaseQueryEmbeddingRetriever(document_store=document_store))
 rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_messages, required_variables=["question"]))
-rag_pipeline.add_component("llm", HuggingFaceAPIChatGenerator(
-    api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, 
-    api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
-))
+rag_pipeline.add_component(
+    "llm",
+    HuggingFaceAPIChatGenerator(
+        api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
+        api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
+    ),
+)
 rag_pipeline.add_component("answer_builder", AnswerBuilder())
 
 rag_pipeline.connect("query_embedder", "retriever.query_embedding")
diff --git a/examples/search/rag_pipeline.py b/examples/search/rag_pipeline.py
@@ -1,12 +1,13 @@
 import os
+
 from haystack import GeneratedAnswer, Pipeline
 from haystack.components.builders.answer_builder import AnswerBuilder
 from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
-from haystack.dataclasses import ChatMessage
 from haystack.components.embedders import SentenceTransformersTextEmbedder
 from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
-from haystack.utils.hf import HFGenerationAPIType
+from haystack.dataclasses import ChatMessage
 from haystack.utils import Secret
+from haystack.utils.hf import HFGenerationAPIType
 
 from couchbase_haystack import CouchbasePasswordAuthenticator, CouchbaseSearchDocumentStore, CouchbaseSearchEmbeddingRetriever
 
@@ -23,7 +24,9 @@
 
 document_store = CouchbaseSearchDocumentStore(
     cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
-    authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
+    authenticator=CouchbasePasswordAuthenticator(
+        username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
+    ),
     bucket=os.getenv("BUCKET_NAME"),
     scope=os.getenv("SCOPE_NAME"),
     collection=os.getenv("COLLECTION_NAME"),
@@ -34,14 +37,16 @@
 # interacting with LLMs using a custom prompt.
 prompt_messages = [
     ChatMessage.from_system("You are a helpful assistant that answers questions based on the provided documents."),
-    ChatMessage.from_user("""Given these documents, answer the question.
+    ChatMessage.from_user(
+        """Given these documents, answer the question.
 Documents:
 {% for doc in documents %}
     {{ doc.content }}
 {% endfor %}
 
 Question: {{question}}
-Answer:""")
+Answer:"""
+    ),
 ]
 rag_pipeline = Pipeline()
 rag_pipeline.add_component(
@@ -50,10 +55,13 @@
 )
 rag_pipeline.add_component("retriever", CouchbaseSearchEmbeddingRetriever(document_store=document_store))
 rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_messages, required_variables=["question"]))
-rag_pipeline.add_component("llm", HuggingFaceAPIChatGenerator(
-    api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, 
-    api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
-))
+rag_pipeline.add_component(
+    "llm",
+    HuggingFaceAPIChatGenerator(
+        api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
+        api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
+    ),
+)
 rag_pipeline.add_component("answer_builder", AnswerBuilder())
 
 rag_pipeline.connect("query_embedder", "retriever.query_embedding")
diff --git a/pyproject.toml b/pyproject.toml
@@ -160,6 +160,8 @@ lint.ignore = [
   "B027",
   # Allow boolean positional values in function calls, like `dict.get(... True)`
   "FBT003",
+  # Allow boolean positional values in function calls like cluster options
+  "FBT001",
   # Ignore checks for possible passwords
   "S105", "S106", "S107",
   # Ignore complexity
diff --git a/src/couchbase_haystack/components/retrievers/embedding_retriever.py b/src/couchbase_haystack/components/retrievers/embedding_retriever.py
@@ -16,7 +16,7 @@
 @component
 class CouchbaseSearchEmbeddingRetriever:
     """Retrieves documents from the CouchbaseSearchDocumentStore by embedding similarity.
-    
+
     Uses Search Vector Index (FTS-based) for hybrid searches combining vector, full-text, and geospatial queries.
     See CouchbaseSearchDocumentStore for more information.
 
@@ -139,7 +139,7 @@ def run(
 @component
 class CouchbaseQueryEmbeddingRetriever:
     """Retrieves documents from the CouchbaseQueryDocumentStore using vector similarity search.
-    
+
     Works with both Hyperscale Vector Index and Composite Vector Index.
     Supports ANN (approximate) and KNN (exact) search with various similarity metrics.
     See CouchbaseQueryDocumentStore for more details.
diff --git a/src/couchbase_haystack/document_stores/document_store.py b/src/couchbase_haystack/document_stores/document_store.py
@@ -580,18 +580,18 @@ def __get_doc_from_kv(self, response: SearchResult) -> List[Document]:
 
 class CouchbaseQueryDocumentStore(CouchbaseDocumentStore):
     """CouchbaseQueryDocumentStore uses Couchbase Global Secondary Index (GSI) for high-performance vector search.
-    
+
     Supports two types of vector indexes:
-    
+
     - **Hyperscale Vector Indexes**: Optimized for pure vector searches, scales to billions of documents.
       Best for chatbot context (RAG), reverse image search, and anomaly detection.
-    
+
     - **Composite Vector Indexes**: Combines vector and scalar indexing. Applies scalar filters before vector search.
       Best for filtered recommendations, job searches, and supply chain management.
-    
+
     Search types: ANN (fast, approximate) or KNN (exact).
     Similarity metrics: COSINE, DOT, L2/EUCLIDEAN, L2_SQUARED/EUCLIDEAN_SQUARED.
-    
+
     See [Couchbase documentation](https://docs.couchbase.com/server/current/vector-index/use-vector-indexes.html).
     """
 
@@ -802,7 +802,6 @@ def _embedding_retrieval(
         """  # noqa: S608  # query_vector_str is a float array, where_clause is normalized by normalize_sql_filters
 
         try:
-
             query_options = self.query_options.cb_query_options()
             # Execute the query
             result: QueryResult = self.connection.query(
diff --git a/src/couchbase_haystack/telemetry.py b/src/couchbase_haystack/telemetry.py
@@ -8,39 +8,38 @@
 
 import platform
 import threading
+from contextlib import suppress
+from importlib.metadata import version
 
 SCARF_ENDPOINT_URL = "https://couchbase.gateway.scarf.sh/couchbase-haystack"
 
-_telemetry_sent = False
+_telemetry_sent = threading.Event()
 _telemetry_lock = threading.Lock()
 
+
 def _get_package_version() -> str:
     """Return the installed package version, or 'unknown' if unavailable."""
-    try:
-        from importlib.metadata import version
-
+    with suppress(Exception):
         return version("couchbase-haystack")
-    except Exception:
-        return "unknown"
+    return "unknown"
+
 
 def _send_telemetry() -> None:
     """Send a single telemetry event to Scarf."""
-    global _telemetry_sent 
-
     with _telemetry_lock:
-        if _telemetry_sent:
+        if _telemetry_sent.is_set():
             return
-        _telemetry_sent = True
+        _telemetry_sent.set()
 
-    try:
-        from scarf import ScarfEventLogger
+    with suppress(Exception):
+        from scarf import ScarfEventLogger  # noqa: PLC0415
 
-        logger = ScarfEventLogger(
+        event_logger = ScarfEventLogger(
             endpoint_url=SCARF_ENDPOINT_URL,
             timeout=2.0,
         )
 
-        logger.log_event(
+        event_logger.log_event(
             {
                 "package": "couchbase-haystack",
                 "version": _get_package_version(),
@@ -49,17 +48,13 @@ def _send_telemetry() -> None:
                 "arch": platform.machine(),
             }
         )
-    except Exception:
-        # Telemetry must never raise — silently ignore all errors.
-        pass
+
 
 def send_telemetry() -> None:
     """Fire-and-forget telemetry in a background daemon thread.
 
     Safe to call multiple times; only the first invocation actually sends.
     """
-    try:
+    with suppress(Exception):
         t = threading.Thread(target=_send_telemetry, daemon=True)
         t.start()
-    except Exception:
-        pass
diff --git a/tests/search_document_store/test_document_store.py b/tests/search_document_store/test_document_store.py
@@ -149,7 +149,6 @@ def document_store(self):
         cluster.close()
 
     def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
-
         for r in received:
             r.score = None
             r.embedding = None
@@ -283,7 +282,6 @@ class TestSearchDocumentStoreUnit:
     @pytest.fixture
     def document_store(self):
         with patch("couchbase_haystack.document_stores.document_store.Cluster") as mock_cb_cluster:
-
             cluster = mock_cb_cluster.return_value
             bucket = cluster.bucket.return_value
             scope = bucket.scope.return_value
diff --git a/tests/search_document_store/test_retriever.py b/tests/search_document_store/test_retriever.py
@@ -131,13 +131,13 @@ def test_run(self, doc_store: MagicMock):
             "retriever": {"top_k": 3, "search_query": sq, "filters": {"field": "meta.color", "operator": "==", "value": "red"}},
         }
         result = rag_pipeline.run(data, include_outputs_from={"query_embedder"})
-        doc_store._embedding_retrieval.assert_called_once_with(
-            query_embedding=result["query_embedder"]["embedding"],
-            top_k=3,
-            search_query=data["retriever"]["search_query"],  # type: ignore
-            filters=data["retriever"]["filters"],  # type: ignore
-            limit=None,
-        )
+        doc_store._embedding_retrieval.assert_called_once()
+        called_kwargs = doc_store._embedding_retrieval.call_args.kwargs
+        assert called_kwargs["query_embedding"] == result["query_embedder"]["embedding"]
+        assert called_kwargs["top_k"] == 3
+        assert called_kwargs["search_query"].encodable == data["retriever"]["search_query"].encodable
+        assert called_kwargs["filters"] == data["retriever"]["filters"]
+        assert called_kwargs["limit"] is None
         assert result["retriever"]["documents"] == doc_store._embedding_retrieval.return_value
 
     def test_run_with_limit(self, doc_store: MagicMock):
diff --git a/tests/search_document_store/test_search_filter.py b/tests/search_document_store/test_search_filter.py
@@ -216,7 +216,6 @@ def test_filter_lt_condition_array_of_number(self):
 
 @pytest.mark.unit
 class TestFilterLTE:
-
     # def test_filter_gt_condition_str(self):
     #    _filter = {"field": "meta.years", "operator": "==", "value": "2019"}
     #    normalized_filter = _normalize_filters(_filter)