Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
python-version: "3.10"

- name: Install Hatch
run: |
Expand All @@ -32,9 +32,11 @@ jobs:
strategy:
matrix:
python-version:
- "3.8.x"
- "3.9.x"
- "3.10.x"
- "3.11.x"
- "3.12.x"
- "3.13.x"
- "3.14.x"
os:
- ubuntu-24.04
# - windows-latest
Expand Down
15 changes: 10 additions & 5 deletions examples/query/indexing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import requests
from couchbase.n1ql import QueryScanConsistency
from couchbase.options import KnownConfigProfiles, QueryOptions
from haystack import Pipeline
from haystack.components.converters import TextFileToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
Expand All @@ -16,13 +17,12 @@
from haystack.utils import Secret

from couchbase_haystack import (
CouchbaseClusterOptions,
CouchbasePasswordAuthenticator,
CouchbaseQueryDocumentStore,
CouchbaseQueryOptions,
QueryVectorSearchType,
CouchbaseClusterOptions,
)
from couchbase.options import KnownConfigProfiles, QueryOptions

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -59,7 +59,9 @@ def fetch_archive_from_http(url: str, output_dir: str):

document_store = CouchbaseQueryDocumentStore(
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
authenticator=CouchbasePasswordAuthenticator(
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
),
cluster_options=CouchbaseClusterOptions(profile=KnownConfigProfiles.WanDevelopment),
bucket=bucket_name,
scope=scope_name,
Expand Down Expand Up @@ -102,6 +104,9 @@ def fetch_archive_from_http(url: str, output_dir: str):
"description": "IVF,PQ32x8",
"similarity": "L2",
}
document_store.scope.query(f"Create Index {index_name} ON {collection_name} (embedding vector) USING GSI WITH {json.dumps(cfg)}", QueryOptions(timeout=timedelta(seconds=300))).execute()
document_store.scope.query(
f"Create Index {index_name} ON {collection_name} (embedding vector) USING GSI WITH {json.dumps(cfg)}",
QueryOptions(timeout=timedelta(seconds=300)),
).execute()

logger.info(f"Index created: {index_name}")
logger.info(f"Index created: {index_name}")
29 changes: 19 additions & 10 deletions examples/query/rag_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import os

from couchbase.options import KnownConfigProfiles
from haystack import GeneratedAnswer, Pipeline
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
from haystack.utils.hf import HFGenerationAPIType
from haystack.dataclasses import ChatMessage
from haystack.utils import Secret
from haystack.utils.hf import HFGenerationAPIType

from couchbase_haystack import (
CouchbaseClusterOptions,
CouchbasePasswordAuthenticator,
CouchbaseQueryDocumentStore,
CouchbaseQueryEmbeddingRetriever,
QueryVectorSearchType,
)
from couchbase.options import KnownConfigProfiles

# Load HF Token from environment variables.
HF_TOKEN = Secret.from_env_var("HF_API_TOKEN")
Expand All @@ -29,7 +31,9 @@

document_store = CouchbaseQueryDocumentStore(
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
authenticator=CouchbasePasswordAuthenticator(
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
),
cluster_options=CouchbaseClusterOptions(
profile=KnownConfigProfiles.WanDevelopment,
),
Expand All @@ -45,14 +49,16 @@
# interacting with LLMs using a custom prompt.
prompt_messages = [
ChatMessage.from_system("You are a helpful assistant that answers questions based on the provided documents."),
ChatMessage.from_user("""Given these documents, answer the question.
ChatMessage.from_user(
"""Given these documents, answer the question.
Documents:
{% for doc in documents %}
{{ doc.content }}
{% endfor %}

Question: {{question}}
Answer:""")
Answer:"""
),
]
rag_pipeline = Pipeline()
rag_pipeline.add_component(
Expand All @@ -61,10 +67,13 @@
)
rag_pipeline.add_component("retriever", CouchbaseQueryEmbeddingRetriever(document_store=document_store))
rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_messages, required_variables=["question"]))
rag_pipeline.add_component("llm", HuggingFaceAPIChatGenerator(
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
))
rag_pipeline.add_component(
"llm",
HuggingFaceAPIChatGenerator(
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
),
)
rag_pipeline.add_component("answer_builder", AnswerBuilder())

rag_pipeline.connect("query_embedder", "retriever.query_embedding")
Expand Down
26 changes: 17 additions & 9 deletions examples/search/rag_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import os

from haystack import GeneratedAnswer, Pipeline
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.chat_prompt_builder import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.generators.chat import HuggingFaceAPIChatGenerator
from haystack.utils.hf import HFGenerationAPIType
from haystack.dataclasses import ChatMessage
from haystack.utils import Secret
from haystack.utils.hf import HFGenerationAPIType

from couchbase_haystack import CouchbasePasswordAuthenticator, CouchbaseSearchDocumentStore, CouchbaseSearchEmbeddingRetriever

Expand All @@ -23,7 +24,9 @@

document_store = CouchbaseSearchDocumentStore(
cluster_connection_string=Secret.from_env_var("CONNECTION_STRING"),
authenticator=CouchbasePasswordAuthenticator(username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")),
authenticator=CouchbasePasswordAuthenticator(
username=Secret.from_env_var("USER_NAME"), password=Secret.from_env_var("PASSWORD")
),
bucket=os.getenv("BUCKET_NAME"),
scope=os.getenv("SCOPE_NAME"),
collection=os.getenv("COLLECTION_NAME"),
Expand All @@ -34,14 +37,16 @@
# interacting with LLMs using a custom prompt.
prompt_messages = [
ChatMessage.from_system("You are a helpful assistant that answers questions based on the provided documents."),
ChatMessage.from_user("""Given these documents, answer the question.
ChatMessage.from_user(
"""Given these documents, answer the question.
Documents:
{% for doc in documents %}
{{ doc.content }}
{% endfor %}

Question: {{question}}
Answer:""")
Answer:"""
),
]
rag_pipeline = Pipeline()
rag_pipeline.add_component(
Expand All @@ -50,10 +55,13 @@
)
rag_pipeline.add_component("retriever", CouchbaseSearchEmbeddingRetriever(document_store=document_store))
rag_pipeline.add_component("prompt_builder", ChatPromptBuilder(template=prompt_messages, required_variables=["question"]))
rag_pipeline.add_component("llm", HuggingFaceAPIChatGenerator(
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
))
rag_pipeline.add_component(
"llm",
HuggingFaceAPIChatGenerator(
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
api_params={"model": "mistralai/Mistral-7B-Instruct-v0.2"},
),
)
rag_pipeline.add_component("answer_builder", AnswerBuilder())

rag_pipeline.connect("query_embedder", "retriever.query_embedding")
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ exclude_lines = [
[tool.hatch.envs.lint]
detached = true
dependencies = [
"black>=23.1.0",
"black==23.11.0",
"mypy>=1.0.0",
"ruff>=0.0.243",
"ruff>=0.15.5",
]

[tool.hatch.envs.lint.scripts]
Expand Down Expand Up @@ -160,6 +160,8 @@ lint.ignore = [
"B027",
# Allow boolean positional values in function calls, like `dict.get(... True)`
"FBT003",
# Allow boolean positional values in function calls like cluster options
"FBT001",
# Ignore checks for possible passwords
"S105", "S106", "S107",
# Ignore complexity
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
@component
class CouchbaseSearchEmbeddingRetriever:
"""Retrieves documents from the CouchbaseSearchDocumentStore by embedding similarity.

Uses Search Vector Index (FTS-based) for hybrid searches combining vector, full-text, and geospatial queries.
See CouchbaseSearchDocumentStore for more information.

Expand Down Expand Up @@ -139,7 +139,7 @@ def run(
@component
class CouchbaseQueryEmbeddingRetriever:
"""Retrieves documents from the CouchbaseQueryDocumentStore using vector similarity search.

Works with both Hyperscale Vector Index and Composite Vector Index.
Supports ANN (approximate) and KNN (exact) search with various similarity metrics.
See CouchbaseQueryDocumentStore for more details.
Expand Down
11 changes: 5 additions & 6 deletions src/couchbase_haystack/document_stores/document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,18 +580,18 @@ def __get_doc_from_kv(self, response: SearchResult) -> List[Document]:

class CouchbaseQueryDocumentStore(CouchbaseDocumentStore):
"""CouchbaseQueryDocumentStore uses Couchbase Global Secondary Index (GSI) for high-performance vector search.

Supports two types of vector indexes:

- **Hyperscale Vector Indexes**: Optimized for pure vector searches, scales to billions of documents.
Best for chatbot context (RAG), reverse image search, and anomaly detection.

- **Composite Vector Indexes**: Combines vector and scalar indexing. Applies scalar filters before vector search.
Best for filtered recommendations, job searches, and supply chain management.

Search types: ANN (fast, approximate) or KNN (exact).
Similarity metrics: COSINE, DOT, L2/EUCLIDEAN, L2_SQUARED/EUCLIDEAN_SQUARED.

See [Couchbase documentation](https://docs.couchbase.com/server/current/vector-index/use-vector-indexes.html).
"""

Expand Down Expand Up @@ -802,7 +802,6 @@ def _embedding_retrieval(
""" # noqa: S608 # query_vector_str is a float array, where_clause is normalized by normalize_sql_filters

try:

query_options = self.query_options.cb_query_options()
# Execute the query
result: QueryResult = self.connection.query(
Expand Down
12 changes: 8 additions & 4 deletions src/couchbase_haystack/document_stores/search_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def inclusive_start(self) -> Optional[bool]:

@inclusive_start.setter
def inclusive_start(
self, value # type: bool
self,
value, # type: bool
) -> None:
self.set_prop("inclusive_start", value)

Expand All @@ -37,7 +38,8 @@ def inclusive_end(self) -> Optional[bool]:

@inclusive_end.setter
def inclusive_end(
self, value # type: bool
self,
value, # type: bool
) -> None:
self.set_prop("inclusive_end", value)

Expand All @@ -49,7 +51,8 @@ def inclusive_min(self) -> Optional[bool]:

@inclusive_min.setter
def inclusive_min(
self, value # type: bool
self,
value, # type: bool
) -> None:
self.set_prop("inclusive_min", value)

Expand All @@ -59,7 +62,8 @@ def inclusive_max(self) -> Optional[bool]:

@inclusive_max.setter
def inclusive_max(
self, value # type: bool
self,
value, # type: bool
) -> None:
self.set_prop("inclusive_max", value)

Expand Down
35 changes: 15 additions & 20 deletions src/couchbase_haystack/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,39 +8,38 @@

import platform
import threading
from contextlib import suppress
from importlib.metadata import version

SCARF_ENDPOINT_URL = "https://couchbase.gateway.scarf.sh/couchbase-haystack"

_telemetry_sent = False
_telemetry_sent = threading.Event()
_telemetry_lock = threading.Lock()


def _get_package_version() -> str:
"""Return the installed package version, or 'unknown' if unavailable."""
try:
from importlib.metadata import version

with suppress(Exception):
return version("couchbase-haystack")
except Exception:
return "unknown"
return "unknown"


def _send_telemetry() -> None:
"""Send a single telemetry event to Scarf."""
global _telemetry_sent

with _telemetry_lock:
if _telemetry_sent:
if _telemetry_sent.is_set():
return
_telemetry_sent = True
_telemetry_sent.set()

try:
from scarf import ScarfEventLogger
with suppress(Exception):
from scarf import ScarfEventLogger # noqa: PLC0415

logger = ScarfEventLogger(
event_logger = ScarfEventLogger(
endpoint_url=SCARF_ENDPOINT_URL,
timeout=2.0,
)

logger.log_event(
event_logger.log_event(
{
"package": "couchbase-haystack",
"version": _get_package_version(),
Expand All @@ -49,17 +48,13 @@ def _send_telemetry() -> None:
"arch": platform.machine(),
}
)
except Exception:
# Telemetry must never raise — silently ignore all errors.
pass


def send_telemetry() -> None:
"""Fire-and-forget telemetry in a background daemon thread.

Safe to call multiple times; only the first invocation actually sends.
"""
try:
with suppress(Exception):
t = threading.Thread(target=_send_telemetry, daemon=True)
t.start()
except Exception:
pass
2 changes: 1 addition & 1 deletion tests/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ def create_collection_if_not_exists(collection_manager: CollectionManager, scope


def load_json_file(file_path):
with open(file_path, 'r') as file:
with open(file_path, "r") as file:
return json.load(file)
Loading
Loading