Skip to content

Commit 603ef23

Browse files
authored
chore: cicd fix and code cleanup (#23)
* code cleanup v1 * code cleanup v2 * ci cd python version upgrade
1 parent b2ab514 commit 603ef23

11 files changed

Lines changed: 82 additions & 65 deletions

File tree

.github/workflows/ci.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
- name: Setup Python
1919
uses: actions/setup-python@v4
2020
with:
21-
python-version: "3.8"
21+
python-version: "3.10"
2222

2323
- name: Install Hatch
2424
run: |
@@ -32,9 +32,11 @@ jobs:
3232
strategy:
3333
matrix:
3434
python-version:
35-
- "3.8.x"
36-
- "3.9.x"
3735
- "3.10.x"
36+
- "3.11.x"
37+
- "3.12.x"
38+
- "3.13.x"
39+
- "3.14.x"
3840
os:
3941
- ubuntu-24.04
4042
# - windows-latest

examples/query/indexing_pipeline.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import requests
1010
from couchbase.n1ql import QueryScanConsistency
11+
from couchbase.options import KnownConfigProfiles, QueryOptions
1112
from haystack import Pipeline
1213
from haystack.components.converters import TextFileToDocument
1314
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
@@ -16,13 +17,12 @@
1617
from haystack.utils import Secret
1718

1819
from couchbase_haystack import (
20+
CouchbaseClusterOptions,
1921
CouchbasePasswordAuthenticator,
2022
CouchbaseQueryDocumentStore,
2123
CouchbaseQueryOptions,
2224
QueryVectorSearchType,
23-
CouchbaseClusterOptions,
2425
)
25-
from couchbase.options import KnownConfigProfiles, QueryOptions
2626

2727
logger = logging.getLogger(__name__)
2828

@@ -59,7 +59,9 @@ def fetch_archive_from_http(url: str, output_dir: str):
5959

6060
document_store = CouchbaseQueryDocumentStore(
6161
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
62-
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
62+
authenticator=CouchbasePasswordAuthenticator(
63+
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
64+
),
6365
cluster_options=CouchbaseClusterOptions(profile=KnownConfigProfiles.WanDevelopment),
6466
bucket=bucket_name,
6567
scope=scope_name,
@@ -102,6 +104,9 @@ def fetch_archive_from_http(url: str, output_dir: str):
102104
"description": "IVF,PQ32x8",
103105
"similarity": "L2",
104106
}
105-
document_store.scope.query(f"Create Index {index_name} ON {collection_name} (embedding vector) USING GSI WITH {json.dumps(cfg)}", QueryOptions(timeout=timedelta(seconds=300))).execute()
107+
document_store.scope.query(
108+
f"Create Index {index_name} ON {collection_name} (embedding vector) USING GSI WITH {json.dumps(cfg)}",
109+
QueryOptions(timeout=timedelta(seconds=300)),
110+
).execute()
106111

107-
logger.info(f"Index created: {index_name}")
112+
logger.info(f"Index created: {index_name}")

examples/query/rag_pipeline.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
11
import os
2+
3+
from couchbase.options import KnownConfigProfiles
24
from haystack import GeneratedAnswer, Pipeline
35
from haystack.components.builders.answer_builder import AnswerBuilder
46
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
5-
from haystack.dataclasses import ChatMessage
67
from haystack.components.embedders import SentenceTransformersTextEmbedder
78
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
8-
from haystack.utils.hf import HFGenerationAPIType
9+
from haystack.dataclasses import ChatMessage
910
from haystack.utils import Secret
11+
from haystack.utils.hf import HFGenerationAPIType
12+
1013
from couchbase_haystack import (
1114
CouchbaseClusterOptions,
1215
CouchbasePasswordAuthenticator,
1316
CouchbaseQueryDocumentStore,
1417
CouchbaseQueryEmbeddingRetriever,
1518
QueryVectorSearchType,
1619
)
17-
from couchbase.options import KnownConfigProfiles
1820

1921
# Load HF Token from environment variables.
2022
HF_TOKEN = Secret.from_env_var("HF_API_TOKEN")
@@ -29,7 +31,9 @@
2931

3032
document_store = CouchbaseQueryDocumentStore(
3133
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
32-
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
34+
authenticator=CouchbasePasswordAuthenticator(
35+
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
36+
),
3337
cluster_options=CouchbaseClusterOptions(
3438
profile=KnownConfigProfiles.WanDevelopment,
3539
),
@@ -45,14 +49,16 @@
4549
# interacting with LLMs using a custom prompt.
4650
prompt_messages = [
4751
ChatMessage.from_system("You are a helpful assistant that answers questions based on the provided documents."),
48-
ChatMessage.from_user("""Given these documents, answer the question.
52+
ChatMessage.from_user(
53+
"""Given these documents, answer the question.
4954
Documents:
5055
{% for doc in documents %}
5156
{{ doc.content }}
5257
{% endfor %}
5358
5459
Question: {{question}}
55-
Answer:""")
60+
Answer:"""
61+
),
5662
]
5763
rag_pipeline = Pipeline()
5864
rag_pipeline.add_component(
@@ -61,10 +67,13 @@
6167
)
6268
rag_pipeline.add_component("retriever", CouchbaseQueryEmbeddingRetriever(document_store=document_store))
6369
rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_messages, required_variables=["question"]))
64-
rag_pipeline.add_component("llm", HuggingFaceAPIChatGenerator(
65-
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
66-
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
67-
))
70+
rag_pipeline.add_component(
71+
"llm",
72+
HuggingFaceAPIChatGenerator(
73+
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
74+
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
75+
),
76+
)
6877
rag_pipeline.add_component("answer_builder", AnswerBuilder())
6978

7079
rag_pipeline.connect("query_embedder", "retriever.query_embedding")

examples/search/rag_pipeline.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import os
2+
23
from haystack import GeneratedAnswer, Pipeline
34
from haystack.components.builders.answer_builder import AnswerBuilder
45
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
5-
from haystack.dataclasses import ChatMessage
66
from haystack.components.embedders import SentenceTransformersTextEmbedder
77
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
8-
from haystack.utils.hf import HFGenerationAPIType
8+
from haystack.dataclasses import ChatMessage
99
from haystack.utils import Secret
10+
from haystack.utils.hf import HFGenerationAPIType
1011

1112
from couchbase_haystack import CouchbasePasswordAuthenticator, CouchbaseSearchDocumentStore, CouchbaseSearchEmbeddingRetriever
1213

@@ -23,7 +24,9 @@
2324

2425
document_store = CouchbaseSearchDocumentStore(
2526
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
26-
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
27+
authenticator=CouchbasePasswordAuthenticator(
28+
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
29+
),
2730
bucket=os.getenv("BUCKET_NAME"),
2831
scope=os.getenv("SCOPE_NAME"),
2932
collection=os.getenv("COLLECTION_NAME"),
@@ -34,14 +37,16 @@
3437
# interacting with LLMs using a custom prompt.
3538
prompt_messages = [
3639
ChatMessage.from_system("You are a helpful assistant that answers questions based on the provided documents."),
37-
ChatMessage.from_user("""Given these documents, answer the question.
40+
ChatMessage.from_user(
41+
"""Given these documents, answer the question.
3842
Documents:
3943
{% for doc in documents %}
4044
{{ doc.content }}
4145
{% endfor %}
4246
4347
Question: {{question}}
44-
Answer:""")
48+
Answer:"""
49+
),
4550
]
4651
rag_pipeline = Pipeline()
4752
rag_pipeline.add_component(
@@ -50,10 +55,13 @@
5055
)
5156
rag_pipeline.add_component("retriever", CouchbaseSearchEmbeddingRetriever(document_store=document_store))
5257
rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_messages, required_variables=["question"]))
53-
rag_pipeline.add_component("llm", HuggingFaceAPIChatGenerator(
54-
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
55-
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
56-
))
58+
rag_pipeline.add_component(
59+
"llm",
60+
HuggingFaceAPIChatGenerator(
61+
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
62+
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
63+
),
64+
)
5765
rag_pipeline.add_component("answer_builder", AnswerBuilder())
5866

5967
rag_pipeline.connect("query_embedder", "retriever.query_embedding")

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ lint.ignore = [
160160
"B027",
161161
# Allow boolean positional values in function calls, like `dict.get(... True)`
162162
"FBT003",
163+
# Allow boolean positional values in function calls like cluster options
164+
"FBT001",
163165
# Ignore checks for possible passwords
164166
"S105", "S106", "S107",
165167
# Ignore complexity

src/couchbase_haystack/components/retrievers/embedding_retriever.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
@component
1717
class CouchbaseSearchEmbeddingRetriever:
1818
"""Retrieves documents from the CouchbaseSearchDocumentStore by embedding similarity.
19-
19+
2020
Uses Search Vector Index (FTS-based) for hybrid searches combining vector, full-text, and geospatial queries.
2121
See CouchbaseSearchDocumentStore for more information.
2222
@@ -139,7 +139,7 @@ def run(
139139
@component
140140
class CouchbaseQueryEmbeddingRetriever:
141141
"""Retrieves documents from the CouchbaseQueryDocumentStore using vector similarity search.
142-
142+
143143
Works with both Hyperscale Vector Index and Composite Vector Index.
144144
Supports ANN (approximate) and KNN (exact) search with various similarity metrics.
145145
See CouchbaseQueryDocumentStore for more details.

src/couchbase_haystack/document_stores/document_store.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -580,18 +580,18 @@ def __get_doc_from_kv(self, response: SearchResult) -> List[Document]:
580580

581581
class CouchbaseQueryDocumentStore(CouchbaseDocumentStore):
582582
"""CouchbaseQueryDocumentStore uses Couchbase Global Secondary Index (GSI) for high-performance vector search.
583-
583+
584584
Supports two types of vector indexes:
585-
585+
586586
- **Hyperscale Vector Indexes**: Optimized for pure vector searches, scales to billions of documents.
587587
Best for chatbot context (RAG), reverse image search, and anomaly detection.
588-
588+
589589
- **Composite Vector Indexes**: Combines vector and scalar indexing. Applies scalar filters before vector search.
590590
Best for filtered recommendations, job searches, and supply chain management.
591-
591+
592592
Search types: ANN (fast, approximate) or KNN (exact).
593593
Similarity metrics: COSINE, DOT, L2/EUCLIDEAN, L2_SQUARED/EUCLIDEAN_SQUARED.
594-
594+
595595
See [Couchbase documentation](https://docs.couchbase.com/server/current/vector-index/use-vector-indexes.html).
596596
"""
597597

@@ -802,7 +802,6 @@ def _embedding_retrieval(
802802
""" # noqa: S608 # query_vector_str is a float array, where_clause is normalized by normalize_sql_filters
803803

804804
try:
805-
806805
query_options = self.query_options.cb_query_options()
807806
# Execute the query
808807
result: QueryResult = self.connection.query(

src/couchbase_haystack/telemetry.py

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,38 @@
88

99
import platform
1010
import threading
11+
from contextlib import suppress
12+
from importlib.metadata import version
1113

1214
SCARF_ENDPOINT_URL = "https://couchbase.gateway.scarf.sh/couchbase-haystack"
1315

14-
_telemetry_sent = False
16+
_telemetry_sent = threading.Event()
1517
_telemetry_lock = threading.Lock()
1618

19+
1720
def _get_package_version() -> str:
1821
"""Return the installed package version, or 'unknown' if unavailable."""
19-
try:
20-
from importlib.metadata import version
21-
22+
with suppress(Exception):
2223
return version("couchbase-haystack")
23-
except Exception:
24-
return "unknown"
24+
return "unknown"
25+
2526

2627
def _send_telemetry() -> None:
2728
"""Send a single telemetry event to Scarf."""
28-
global _telemetry_sent
29-
3029
with _telemetry_lock:
31-
if _telemetry_sent:
30+
if _telemetry_sent.is_set():
3231
return
33-
_telemetry_sent = True
32+
_telemetry_sent.set()
3433

35-
try:
36-
from scarf import ScarfEventLogger
34+
with suppress(Exception):
35+
from scarf import ScarfEventLogger # noqa: PLC0415
3736

38-
logger = ScarfEventLogger(
37+
event_logger = ScarfEventLogger(
3938
endpoint_url=SCARF_ENDPOINT_URL,
4039
timeout=2.0,
4140
)
4241

43-
logger.log_event(
42+
event_logger.log_event(
4443
{
4544
"package": "couchbase-haystack",
4645
"version": _get_package_version(),
@@ -49,17 +48,13 @@ def _send_telemetry() -> None:
4948
"arch": platform.machine(),
5049
}
5150
)
52-
except Exception:
53-
# Telemetry must never raise — silently ignore all errors.
54-
pass
51+
5552

5653
def send_telemetry() -> None:
5754
"""Fire-and-forget telemetry in a background daemon thread.
5855
5956
Safe to call multiple times; only the first invocation actually sends.
6057
"""
61-
try:
58+
with suppress(Exception):
6259
t = threading.Thread(target=_send_telemetry, daemon=True)
6360
t.start()
64-
except Exception:
65-
pass

tests/search_document_store/test_document_store.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ def document_store(self):
149149
cluster.close()
150150

151151
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
152-
153152
for r in received:
154153
r.score = None
155154
r.embedding = None
@@ -283,7 +282,6 @@ class TestSearchDocumentStoreUnit:
283282
@pytest.fixture
284283
def document_store(self):
285284
with patch("couchbase_haystack.document_stores.document_store.Cluster") as mock_cb_cluster:
286-
287285
cluster = mock_cb_cluster.return_value
288286
bucket = cluster.bucket.return_value
289287
scope = bucket.scope.return_value

tests/search_document_store/test_retriever.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,13 @@ def test_run(self, doc_store: MagicMock):
131131
"retriever": {"top_k": 3, "search_query": sq, "filters": {"field": "meta.color", "operator": "==", "value": "red"}},
132132
}
133133
result = rag_pipeline.run(data, include_outputs_from={"query_embedder"})
134-
doc_store._embedding_retrieval.assert_called_once_with(
135-
query_embedding=result["query_embedder"]["embedding"],
136-
top_k=3,
137-
search_query=data["retriever"]["search_query"], # type: ignore
138-
filters=data["retriever"]["filters"], # type: ignore
139-
limit=None,
140-
)
134+
doc_store._embedding_retrieval.assert_called_once()
135+
called_kwargs = doc_store._embedding_retrieval.call_args.kwargs
136+
assert called_kwargs["query_embedding"] == result["query_embedder"]["embedding"]
137+
assert called_kwargs["top_k"] == 3
138+
assert called_kwargs["search_query"].encodable == data["retriever"]["search_query"].encodable
139+
assert called_kwargs["filters"] == data["retriever"]["filters"]
140+
assert called_kwargs["limit"] is None
141141
assert result["retriever"]["documents"] == doc_store._embedding_retrieval.return_value
142142

143143
def test_run_with_limit(self, doc_store: MagicMock):

0 commit comments

Comments
 (0)