diff --git a/ci/vale/styles/config/vocabularies/nat/accept.txt b/ci/vale/styles/config/vocabularies/nat/accept.txt index 1de8c93b2d..7167b8ece8 100644 --- a/ci/vale/styles/config/vocabularies/nat/accept.txt +++ b/ci/vale/styles/config/vocabularies/nat/accept.txt @@ -53,6 +53,7 @@ Databricks Datadog [Dd]atastore DB(s?) +[Dd]eclaratively [Dd]enylist [Dd]eserialize [Dd]ev @@ -153,6 +154,7 @@ Qwen [Rr]eimplement(ing)? [Rr]einstall(s?) [Rr]eplatform(ing)? +[Rr]erank(er|ing)? [Rr]epo [Rr]eputational [Rr]etarget(ed?) diff --git a/examples/RAG/simple_rag/README.md b/examples/RAG/simple_rag/README.md index 74deff99c4..5e04cb75c6 100644 --- a/examples/RAG/simple_rag/README.md +++ b/examples/RAG/simple_rag/README.md @@ -29,25 +29,37 @@ This is a simple example RAG application to showcase how one can configure and u ## Table of Contents -- [Key Features](#key-features) -- [Quickstart: RAG with Milvus](#quickstart-rag-with-milvus) - - [Installation and Setup](#installation-and-setup) - - [Install this Workflow](#install-this-workflow) - - [Set Up Milvus](#set-up-milvus) - - [Set Up API Keys](#set-up-api-keys) - - [Bootstrap Data](#bootstrap-data) - - [Configure Your Agent](#configure-your-agent) - - [Run the Workflow](#run-the-workflow) -- [Adding Long-Term Agent Memory](#adding-long-term-agent-memory) - - [Prerequisites](#prerequisites) - - [Adding Memory to the Agent](#adding-memory-to-the-agent) -- [Adding Additional Tools](#adding-additional-tools) -- [Using Test Time Compute](#using-test-time-compute) +- [Simple RAG Example](#simple-rag-example) + - [Table of Contents](#table-of-contents) + - [Key Features](#key-features) + - [Quickstart: RAG with Milvus](#quickstart-rag-with-milvus) + - [Installation and Setup](#installation-and-setup) + - [Install this Workflow](#install-this-workflow) + - [Set Up Milvus](#set-up-milvus) + - [Set Up API Keys](#set-up-api-keys) + - [Bootstrap Data](#bootstrap-data) + - [Configure Your Agent](#configure-your-agent) + - [Run the Workflow](#run-the-workflow) + - [Adding Long-Term Agent Memory](#adding-long-term-agent-memory) + - [Prerequisites](#prerequisites) + - [Adding Memory to the Agent](#adding-memory-to-the-agent) + - [Adding Additional Tools](#adding-additional-tools) + - [Using Test Time Compute](#using-test-time-compute) + - [Advanced RAG with NVIDIA RAG Library](#advanced-rag-with-nvidia-rag-library) + - [What the Library Provides](#what-the-library-provides) + - [Prerequisites](#prerequisites-1) + - [Bootstrap Data](#bootstrap-data-1) + - [Key Capabilities](#key-capabilities) + - [Integration with NeMo Agent Toolkit Components](#integration-with-nemo-agent-toolkit-components) + - [RAG-Specific Configuration](#rag-specific-configuration) + - [Example Configuration](#example-configuration) + - [Run the Workflow](#run-the-workflow-1) ## Key Features - **Milvus Vector Database Integration:** Demonstrates the `milvus_retriever` component for storing and retrieving document embeddings from CUDA and MCP documentation. - **ReAct Agent with RAG:** Shows how a `react_agent` can use retriever tools to answer questions by searching through indexed documentation. +- **Advanced RAG Pipeline with NVIDIA RAG Library:** Showcases enhanced retrieval with semantic reranking, query rewriting, confidence filtering, and structured citations. - **Long-term Memory with Mem0:** Includes integration with Mem0 platform for persistent memory, allowing the agent to remember user preferences across sessions. - **Multi-Collection Retrieval:** Demonstrates multiple retriever tools (`cuda_retriever_tool` and `mcp_retriever_tool`) for searching different knowledge bases. - **Additional Tool Integration:** Shows how to extend the RAG system with complementary tools like `tavily_internet_search` and `code_generation` for comprehensive question answering. @@ -356,3 +368,172 @@ The final workflow result should look similar to the following: ```console ['CUDA and MCP are two distinct technologies with different purposes and cannot be directly compared. CUDA is a parallel computing platform and programming model, primarily used for compute-intensive tasks such as scientific simulations, data analytics, and machine learning, whereas MCP is an open protocol designed for providing context to Large Language Models (LLMs), particularly for natural language processing and other AI-related tasks. While they serve different purposes, CUDA and MCP share a common goal of enabling developers to create powerful and efficient applications. They are complementary technologies that can be utilized together in certain applications to achieve innovative outcomes, although their differences in design and functionality set them apart. In essence, CUDA focuses on parallel computing and is developed by NVIDIA, whereas MCP is focused on context provision for LLMs, making them unique in their respective fields but potentially synergistic in specific use cases.'] ``` + +## Advanced RAG with NVIDIA RAG Library + +The NVIDIA RAG Library (`nvidia_rag_lib`) integrates the [NVIDIA RAG Blueprint](https://github.com/NVIDIA-AI-Blueprints/rag) pipeline into NeMo Agent Toolkit. + +The library handles the complexity of multi-stage retrieval, semantic reranking, and query optimization, allowing you to focus on building your application rather than implementing RAG infrastructure. + +### What the Library Provides + +The `nvidia_rag_lib` library provides agent tools powered by the NVIDIA RAG pipeline. + +- **Multi-stage retrieval** with configurable candidate pools and reranking +- **Semantic reranking** using NeMo Retriever models +- **Query rewriting** via LLM-based query optimization +- **Confidence filtering** to ensure result quality +- **Structured citations** for source attribution +- **Multi-collection search** across multiple knowledge bases + +All of these features are managed by the library and configured declaratively in YAML, with no custom code required. + +### Prerequisites + +Install the NVIDIA RAG Library: +```bash +uv pip install -e packages/nvidia_nat_rag_lib +``` + +### Bootstrap Data + +> [!IMPORTANT] +> The NVIDIA RAG Library example uses a different embedding model (`nvidia/llama-3.2-nv-embedqa-1b-v2`) than the basic quickstart. If you have an existing `cuda_docs` collection from the quickstart, drop and re-ingest with the correct embedding model: + +```bash +python scripts/langchain_web_ingest.py \ + -n cuda_docs \ + -e nvidia/llama-3.2-nv-embedqa-1b-v2 \ + --drop_collection +``` + +### Key Capabilities + +The `nvidia_rag_lib` library orchestrates a multi-stage retrieval pipeline with the following capabilities: + +- **Two-stage retrieval:** Combines broad vector search (recall) with semantic reranking (precision) to surface the most relevant results +- **Query rewriting:** LLM reformulates ambiguous or conversational queries before searching +- **Confidence filtering:** Automatically filters out low-quality matches below a configurable threshold +- **Structured citations:** Returns document metadata (name, relevance score) for source attribution + +### Integration with NeMo Agent Toolkit Components + +The `nvidia_rag_lib` library integrates with standard NeMo Agent toolkit components. You configure `llms`, `embedders`, and `retrievers` sections as usual. The library references these components by name: + +```yaml +function_groups: + cuda_qa: + _type: nvidia_rag_lib + llm: nim_llm # References llms.nim_llm + embedder: nim_embedder # References embedders.nim_embedder + retriever: cuda_retriever # References retrievers.cuda_retriever +``` + +This means you can reuse existing NeMo Agent toolkit infrastructure definitions and swap in the RAG library without changing your LLM, embedder, or retriever configurations. + +### RAG-Specific Configuration + +The library adds configuration specific to the RAG pipeline. These fields differ from a standard NeMo Agent toolkit retriever setup: + +| Field | Purpose | +|-------|---------| +| `topic` | Description for agent tool selection | +| `collection_names` | Milvus collections to search | +| `reranker_top_k` | Number of results after reranking | +| `rag_pipeline.enable_citations` | Include document metadata in results | +| `rag_pipeline.default_confidence_threshold` | Filter low-confidence results | +| `rag_pipeline.ranking.enable_reranker` | Enable semantic reranking | +| `rag_pipeline.ranking.model_name` | Reranker model to use | +| `rag_pipeline.query_rewriter.enabled` | Enable LLM query rewriting | + +### Example Configuration + +```yaml +function_groups: + cuda_qa: + _type: nvidia_rag_lib + include: + - search + llm: nim_llm + embedder: nim_embedder + retriever: cuda_retriever + topic: NVIDIA CUDA library + collection_names: + - cuda_docs + reranker_top_k: 10 + rag_pipeline: + enable_citations: true + default_confidence_threshold: 0.25 + ranking: + enable_reranker: true + model_name: nvidia/llama-3.2-nv-rerankqa-1b-v2 + query_rewriter: + enabled: true +``` + +### Run the Workflow + +```bash +nat run --config_file examples/RAG/simple_rag/configs/rag_library_mode_config.yml \ + --input "How do I install CUDA" +``` + +The logs show the pipeline stages in action: + +```console +INFO:nvidia_rag.rag_server.main:Setting top k as: 100. +INFO:nvidia_rag.rag_server.main:Narrowing the collection from 100 results and further narrowing it to 10 with the reranker for search +INFO:nvidia_rag.rag_server.main:Setting ranker top n as: 10. +INFO:nvidia_rag.utils.vdb.milvus.milvus_vdb: Milvus Retrieval latency: 0.8911 seconds +INFO:nvidia_rag.rag_server.main: == Context reranker time: 5631.08 ms == +INFO:nvidia_rag.utils.common:Confidence threshold filtering: 10 -> 10 documents (threshold: 0.25) +``` + +The agent decides to search the knowledge base and retrieves grounded document excerpts: + +```console +[AGENT] +Agent input: How do I install CUDA +Agent's thoughts: +Thought: To answer the user's question about installing CUDA, I need to provide them with the correct steps and requirements. + +Action: cuda_search__search +Action Input: {'query': 'CUDA installation steps'} +``` + +The search tool returns structured citations in JSON format: + +```console +[AGENT] +Calling tools: cuda_search__search +Tool's input: {'query': 'CUDA installation steps'} +Tool's response: +{"total_results":10,"results":[{"document_id":"","content":"Note\nFor both native as well as cross development, +the toolkit must be installed using the distribution-specific installer... +Download the NVIDIA CUDA Toolkit from https://developer.nvidia.com/cuda-downloads. +Choose the platform you are using and download the NVIDIA CUDA Toolkit... +...(truncated)"},...]} +``` + +The agent synthesizes a comprehensive, grounded response with specific commands for multiple platforms: + +```console +['To install CUDA, you can follow these steps: + +1. Verify that you have a CUDA-capable GPU. +2. Download the NVIDIA CUDA Toolkit from https://developer.nvidia.com/cuda-downloads. +3. Install the NVIDIA CUDA Toolkit. The installation steps may vary depending on your operating system. +4. Test that the installed software runs correctly and communicates with the hardware. + +For example, on Ubuntu, you can install CUDA using the following commands: +# apt update +# apt install cuda-toolkit + +On Windows, you can use the network installer or full installer. + +Additionally, you can use Conda to install CUDA: +$ conda install cuda -c nvidia + +You can also use pip wheels: +$ python3 -m pip install nvidia-cuda-runtime-cu12'] +``` diff --git a/examples/RAG/simple_rag/configs/rag_library_mode_config.yml b/examples/RAG/simple_rag/configs/rag_library_mode_config.yml new file mode 100644 index 0000000000..31c536e077 --- /dev/null +++ b/examples/RAG/simple_rag/configs/rag_library_mode_config.yml @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +llms: + nim_llm: + _type: nim + model_name: meta/llama-3.3-70b-instruct + base_url: https://integrate.api.nvidia.com/v1 + temperature: 0 + max_tokens: 4096 + top_p: 1 + +embedders: + nim_embedder: + _type: nim + model_name: nvidia/llama-3.2-nv-embedqa-1b-v2 + base_url: https://integrate.api.nvidia.com/v1 + truncate: "END" + +retrievers: + cuda_retriever: + _type: milvus_retriever + uri: http://localhost:19530 + collection_name: cuda_docs + embedding_model: nim_embedder + top_k: 100 + +function_groups: + cuda_search: + _type: nvidia_rag_lib + include: + - search + llm: nim_llm + embedder: nim_embedder + retriever: cuda_retriever + topic: NVIDIA CUDA library + collection_names: + - cuda_docs + reranker_top_k: 10 + rag_pipeline: + enable_citations: true + default_confidence_threshold: 0.25 + ranking: + enable_reranker: true + model_name: nvidia/llama-3.2-nv-rerankqa-1b-v2 + query_rewriter: + enabled: true + +workflow: + _type: react_agent + tool_names: + - cuda_search + verbose: true + llm_name: nim_llm diff --git a/examples/deploy/docker-compose.milvus.yml b/examples/deploy/docker-compose.milvus.yml index d398e2f98f..307453dab3 100644 --- a/examples/deploy/docker-compose.milvus.yml +++ b/examples/deploy/docker-compose.milvus.yml @@ -100,4 +100,3 @@ services: networks: default: name: nvidia-rag-test - diff --git a/packages/nvidia_nat_all/pyproject.toml b/packages/nvidia_nat_all/pyproject.toml index 13f78c3dc9..cccb2b8a47 100644 --- a/packages/nvidia_nat_all/pyproject.toml +++ b/packages/nvidia_nat_all/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ "nvidia-nat-opentelemetry", "nvidia-nat-phoenix", "nvidia-nat-profiling", + "nvidia-nat-rag-lib", # nvidia-nat-ragaai cannot be part of all due to conflicts with nvidia-nat-strands # "nvidia-nat-ragaai", "nvidia-nat-redis", @@ -86,6 +87,7 @@ nvidia-nat-openpipe-art = { workspace = true } nvidia-nat-opentelemetry = { workspace = true } nvidia-nat-phoenix = { workspace = true } nvidia-nat-profiling = { workspace = true } +nvidia-nat-rag-lib = { workspace = true } nvidia-nat-ragaai = { workspace = true } nvidia-nat-redis = { workspace = true } nvidia-nat-s3 = { workspace = true } diff --git a/packages/nvidia_nat_rag_lib/LICENSE-3rd-party.txt b/packages/nvidia_nat_rag_lib/LICENSE-3rd-party.txt new file mode 120000 index 0000000000..bab0d1f8a7 --- /dev/null +++ b/packages/nvidia_nat_rag_lib/LICENSE-3rd-party.txt @@ -0,0 +1 @@ +../../LICENSE-3rd-party.txt \ No newline at end of file diff --git a/packages/nvidia_nat_rag_lib/LICENSE.md b/packages/nvidia_nat_rag_lib/LICENSE.md new file mode 120000 index 0000000000..f0608a63ae --- /dev/null +++ b/packages/nvidia_nat_rag_lib/LICENSE.md @@ -0,0 +1 @@ +../../LICENSE.md \ No newline at end of file diff --git a/packages/nvidia_nat_rag_lib/pyproject.toml b/packages/nvidia_nat_rag_lib/pyproject.toml new file mode 100644 index 0000000000..cc22632586 --- /dev/null +++ b/packages/nvidia_nat_rag_lib/pyproject.toml @@ -0,0 +1,56 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools >= 64", "setuptools-scm>=8"] + + +[tool.setuptools.packages.find] +where = ["src"] +include = ["nat.*"] + + +[tool.setuptools_scm] +git_describe_command = "git describe --long --first-parent" +root = "../.." + + +[project] +name = "nvidia-nat-rag-lib" +dynamic = ["version"] +dependencies = [ + # Keep package version constraints as open as possible to avoid conflicts with other packages. Always define a minimum + # version when adding a new package. If unsure, default to using `~=` instead of `==`. Does not apply to nvidia-nat packages. + # Keep sorted!!! + "nvidia-nat~=1.5", + "nvidia-rag>=2.4.0", # TODO: Update version constraint when nvidia-rag is published to PyPI +] +requires-python = ">=3.11,<3.14" +description = "Subpackage for NVIDIA RAG library in NeMo Agent toolkit" +readme = "src/nat/meta/pypi.md" +keywords = ["ai", "rag", "agents", "retrieval"] +license = { text = "Apache-2.0" } +authors = [{ name = "NVIDIA Corporation" }] +maintainers = [{ name = "NVIDIA Corporation" }] +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +[project.urls] +documentation = "https://docs.nvidia.com/nemo/agent-toolkit/latest/" +source = "https://github.com/NVIDIA/NeMo-Agent-Toolkit" + + +[tool.uv] +managed = true +config-settings = { editable_mode = "compat" } + + +[tool.uv.sources] +nvidia-nat = { workspace = true } +nvidia-rag = { path = "vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl" } # TODO: Remove when nvidia-rag>=2.4.0 is on PyPI + + +[project.entry-points.'nat.components'] +nat_rag_lib = "nat.plugins.rag_lib.register" diff --git a/packages/nvidia_nat_rag_lib/src/nat/meta/pypi.md b/packages/nvidia_nat_rag_lib/src/nat/meta/pypi.md new file mode 100644 index 0000000000..79e9497893 --- /dev/null +++ b/packages/nvidia_nat_rag_lib/src/nat/meta/pypi.md @@ -0,0 +1,35 @@ + + +![NVIDIA NeMo Agent Toolkit](https://media.githubusercontent.com/media/NVIDIA/NeMo-Agent-Toolkit/refs/heads/main/docs/source/_static/banner.png "NeMo Agent toolkit banner image") + +# NVIDIA NeMo Agent Toolkit RAG Library Subpackage + +Subpackage for NVIDIA RAG library integration in NeMo Agent toolkit. + +This package provides integration with the NVIDIA RAG Blueprint library, allowing NeMo Agent toolkit workflows to use retrieval-augmented generation capabilities with flexible configuration. + +## Features + +- RAG generation and semantic search over vector stores +- Query rewriting and query decomposition for improved retrieval +- Reranking for higher quality results +- Filter expression generation for metadata filtering +- Multimodal support with VLM inference +- Citation generation and guardrails + +For more information about the NVIDIA NeMo Agent toolkit, please visit the [NeMo Agent toolkit GitHub Repo](https://github.com/NVIDIA/NeMo-Agent-Toolkit). diff --git a/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/__init__.py b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/__init__.py new file mode 100644 index 0000000000..3bcc1c39bb --- /dev/null +++ b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/client.py b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/client.py new file mode 100644 index 0000000000..f5f7796e95 --- /dev/null +++ b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/client.py @@ -0,0 +1,218 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from collections.abc import AsyncGenerator + +from pydantic import Field + +from nat.builder.builder import Builder +from nat.builder.function import FunctionGroup +from nat.cli.register_workflow import register_function_group +from nat.data_models.component_ref import EmbedderRef +from nat.data_models.component_ref import LLMRef +from nat.data_models.component_ref import RetrieverRef +from nat.data_models.function import FunctionGroupBaseConfig +from nat.plugins.rag_lib.config import RAGPipelineConfig +from nat.plugins.rag_lib.models import RAGSearchResult + +logger: logging.Logger = logging.getLogger(__name__) + + +class NvidiaRAGLibConfig(FunctionGroupBaseConfig, name="nvidia_rag_lib"): + """Configuration for NVIDIA RAG Library. + + Exposes search and generate tools that share a single RAG client. + """ + llm: LLMRef = Field(description="LLM for response generation and query rewriting.") + embedder: EmbedderRef = Field(description="Embedder for query and document vectorization.") + retriever: RetrieverRef = Field(description="Vector store retriever for document search.") + rag_pipeline: RAGPipelineConfig = Field(default_factory=RAGPipelineConfig, + description="Advanced RAG pipeline settings.") + topic: str | None = Field(default=None, description="Topic for tool descriptions.") + collection_names: list[str] = Field(min_length=1, description="Collections to query.") + reranker_top_k: int = Field(default=10, ge=1, description="Number of results after reranking.") + + +@register_function_group(config_type=NvidiaRAGLibConfig) +async def nvidia_rag_lib(config: NvidiaRAGLibConfig, builder: Builder) -> AsyncGenerator[FunctionGroup, None]: + """NVIDIA RAG Library - exposes search and generate tools.""" + from pydantic import SecretStr + + from nat.data_models.finetuning import OpenAIMessage + from nat.embedder.nim_embedder import NIMEmbedderModelConfig + from nat.llm.nim_llm import NIMModelConfig + from nat.plugins.rag_lib.models import RAGGenerateResult + from nat.retriever.milvus.register import MilvusRetrieverConfig + from nat.retriever.nemo_retriever.register import NemoRetrieverConfig + try: + from nvidia_rag import NvidiaRAG + from nvidia_rag.rag_server.response_generator import ChainResponse + from nvidia_rag.rag_server.response_generator import Citations + from nvidia_rag.utils.configuration import FilterExpressionGeneratorConfig + from nvidia_rag.utils.configuration import NvidiaRAGConfig + from nvidia_rag.utils.configuration import QueryDecompositionConfig + from nvidia_rag.utils.configuration import QueryRewriterConfig + from nvidia_rag.utils.configuration import ReflectionConfig + from nvidia_rag.utils.configuration import VLMConfig + except ImportError as e: + raise ImportError("nvidia-rag package is not installed.") from e + + pipeline: RAGPipelineConfig = config.rag_pipeline + + rag_config: NvidiaRAGConfig = NvidiaRAGConfig( + ranking=pipeline.ranking, + retriever=pipeline.search_settings, + vlm=pipeline.vlm or VLMConfig(), + query_rewriter=pipeline.query_rewriter or QueryRewriterConfig(), + filter_expression_generator=pipeline.filter_generator or FilterExpressionGeneratorConfig(), + query_decomposition=pipeline.query_decomposition or QueryDecompositionConfig(), + reflection=pipeline.reflection or ReflectionConfig(), + enable_citations=pipeline.enable_citations, + enable_guardrails=pipeline.enable_guardrails, + enable_vlm_inference=pipeline.enable_vlm_inference, + vlm_to_llm_fallback=pipeline.vlm_to_llm_fallback, + default_confidence_threshold=pipeline.default_confidence_threshold, + ) + + # resolve LLM config + nim_llm_config = builder.get_llm_config(config.llm) + if not isinstance(nim_llm_config, NIMModelConfig): + raise ValueError(f"Unsupported LLM config type: {type(config.llm)}. Expected NIMModelConfig.") + + base_dict = nim_llm_config.model_dump(include={"base_url", "model_name", "api_key"}, exclude_none=True) + if "base_url" not in base_dict: + raise ValueError("base_url is required for LLM config specified in NVIDIA RAG Config.") + base_dict["server_url"] = base_dict.pop("base_url") + + rag_config.llm.parameters = rag_config.llm.parameters.model_copy( + update=nim_llm_config.model_dump(include={"temperature", "top_p", "max_tokens"}, exclude_none=True)) + + rag_config.llm = rag_config.llm.model_copy(update=base_dict) + rag_config.reflection = rag_config.reflection.model_copy(update=base_dict) + rag_config.filter_expression_generator = rag_config.filter_expression_generator.model_copy(update=base_dict) + + # resolve embedder config + nim_embedder_config = builder.get_embedder_config(config.embedder) + if not isinstance(nim_embedder_config, NIMEmbedderModelConfig): + raise ValueError(f"Unsupported embedder config type: {type(config.embedder)}. Expected NIMEmbedderModelConfig.") + base_dict = nim_embedder_config.model_dump(include={"base_url", "model_name", "api_key", "dimensions"}, + exclude_none=True) + if "base_url" not in base_dict: + raise ValueError("base_url is required for embedder config specified in NVIDIA RAG Config.") + base_dict["server_url"] = base_dict.pop("base_url") + rag_config.embeddings = rag_config.embeddings.model_copy(update=base_dict) + + # resolve retriever config + retriever_config = await builder.get_retriever_config(config.retriever) + match retriever_config: + case MilvusRetrieverConfig(): + rag_config.vector_store.url = str(retriever_config.uri) + if retriever_config.collection_name: + rag_config.vector_store.default_collection_name = retriever_config.collection_name + if retriever_config.connection_args: + if "user" in retriever_config.connection_args: + rag_config.vector_store.username = retriever_config.connection_args["user"] + if "password" in retriever_config.connection_args: + rag_config.vector_store.password = SecretStr(retriever_config.connection_args["password"]) + if retriever_config.top_k: + rag_config.retriever.top_k = retriever_config.top_k + case NemoRetrieverConfig(): + rag_config.vector_store.url = str(retriever_config.uri) + if retriever_config.collection_name: + rag_config.vector_store.default_collection_name = retriever_config.collection_name + if retriever_config.nvidia_api_key: + rag_config.vector_store.api_key = retriever_config.nvidia_api_key + if retriever_config.top_k: + rag_config.retriever.top_k = retriever_config.top_k + case _: + raise ValueError(f"Unsupported retriever config type: {type(retriever_config)}") + + rag_client: NvidiaRAG = NvidiaRAG(config=rag_config) + logger.info("NVIDIA RAG client initialized") + + topic_str: str = f" about {config.topic}" if config.topic else "" + + async def search(query: str) -> RAGSearchResult: + """Search for relevant documents.""" + try: + citations: Citations = await rag_client.search( + query=query, + collection_names=config.collection_names, + reranker_top_k=config.reranker_top_k, + ) + return RAGSearchResult(citations=citations) + except Exception: + logger.exception("RAG search failed") + raise + + # Server-Sent Events (SSE) format prefix for parsing streaming response chunks + DATA_PREFIX = "data: " + DATA_PREFIX_WIDTH = len(DATA_PREFIX) + + async def generate(query: str) -> RAGGenerateResult: + """Generate an answer using the knowledge base.""" + chunks: list[str] = [] + final_citations: Citations | None = None + try: + stream = await rag_client.generate( + messages=[OpenAIMessage(role="user", content=query).model_dump()], + collection_names=config.collection_names, + reranker_top_k=config.reranker_top_k, + ) + async for raw_chunk in stream: + if raw_chunk.startswith(DATA_PREFIX): + raw_chunk = raw_chunk[DATA_PREFIX_WIDTH:].strip() + if not raw_chunk or raw_chunk == "[DONE]": + continue + try: + parsed: ChainResponse = ChainResponse.model_validate_json(raw_chunk) + if parsed.choices: + choice = parsed.choices[0] + if choice.delta and choice.delta.content: + content = choice.delta.content + if isinstance(content, str): + chunks.append(content) + if parsed.citations and parsed.citations.results: + final_citations = parsed.citations + except (ValueError, TypeError, KeyError) as e: + logger.debug("Failed to parse RAG response chunk: %s - %s", type(e).__name__, e) + continue + + answer: str = "".join(chunks) if chunks else "No response generated." + return RAGGenerateResult(answer=answer, citations=final_citations) + + except Exception: + logger.exception("RAG generate failed") + raise + + group = FunctionGroup(config=config) + + group.add_function( + "search", + search, + description=( + f"Retrieve grounded excerpts{topic_str}. " + "Returns document chunks from indexed sources - use this to ground your response in cited source material " + "rather than general knowledge."), + ) + group.add_function( + "generate", + generate, + description=(f"Generate a grounded, cited answer{topic_str}. " + "Synthesizes an answer from retrieved documents, ensuring the response is grounded in cited " + "source material rather than general knowledge."), + ) + yield group diff --git a/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/config.py b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/config.py new file mode 100644 index 0000000000..3a0b6e8f83 --- /dev/null +++ b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/config.py @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration models and type aliases for NVIDIA RAG integration.""" + +from nvidia_rag.utils.configuration import FilterExpressionGeneratorConfig as NvidiaRAGFilterGeneratorConfig +from nvidia_rag.utils.configuration import QueryDecompositionConfig as NvidiaRAGQueryDecompositionConfig +from nvidia_rag.utils.configuration import QueryRewriterConfig as NvidiaRAGQueryRewriterConfig +from nvidia_rag.utils.configuration import RankingConfig as NvidiaRAGRankingConfig +from nvidia_rag.utils.configuration import ReflectionConfig as NvidiaRAGReflectionConfig +from nvidia_rag.utils.configuration import RetrieverConfig as NvidiaRAGRetrieverConfig +from nvidia_rag.utils.configuration import VLMConfig as NvidiaRAGVLMConfig +from pydantic import BaseModel +from pydantic import Field + + +class RAGPipelineConfig(BaseModel): + """Native nvidia_rag pipeline settings. + + Groups all RAG-specific settings that control search behavior, + query preprocessing, and response quality. + """ + + # Search behavior + search_settings: NvidiaRAGRetrieverConfig = Field(default_factory=lambda: NvidiaRAGRetrieverConfig()) + ranking: NvidiaRAGRankingConfig = Field(default_factory=lambda: NvidiaRAGRankingConfig()) + + # Query preprocessing (optional) + query_rewriter: NvidiaRAGQueryRewriterConfig | None = Field( + default=None, description="Rewrites queries for improved retrieval accuracy.") + filter_generator: NvidiaRAGFilterGeneratorConfig | None = Field( + default=None, description="Generates metadata filters from natural language queries.") + query_decomposition: NvidiaRAGQueryDecompositionConfig | None = Field( + default=None, description="Decomposes complex queries into simpler sub-queries.") + + # Response quality (optional) + reflection: NvidiaRAGReflectionConfig | None = Field( + default=None, description="Enables self-reflection to improve response quality.") + + # Multimodal (optional) + vlm: NvidiaRAGVLMConfig | None = Field(default=None, + description="Vision-language model config for multimodal content.") + + # Pipeline flags + enable_citations: bool = Field(default=True, description="Include source citations in responses.") + enable_guardrails: bool = Field(default=False, description="Enable content safety guardrails.") + enable_vlm_inference: bool = Field(default=False, description="Enable vision-language model inference.") + vlm_to_llm_fallback: bool = Field(default=True, description="Fall back to LLM if VLM fails.") + default_confidence_threshold: float = Field(default=0.0, + description="Minimum confidence score to include retrieved results.") diff --git a/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/models.py b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/models.py new file mode 100644 index 0000000000..e4a485ec7b --- /dev/null +++ b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/models.py @@ -0,0 +1,40 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from nvidia_rag.rag_server.response_generator import Citations +from pydantic import BaseModel +from pydantic import ConfigDict + + +class RAGResultBase(BaseModel): + """Base model for RAG tool results.""" + model_config = ConfigDict(extra="allow") + + +class RAGSearchResult(RAGResultBase): + """RAG search result.""" + citations: Citations + + def __str__(self) -> str: + return self.citations.model_dump_json() + + +class RAGGenerateResult(RAGResultBase): + """RAG generation result.""" + answer: str + citations: Citations | None = None + + def __str__(self) -> str: + return self.model_dump_json(exclude_none=True) diff --git a/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/register.py b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/register.py new file mode 100644 index 0000000000..dc8143e078 --- /dev/null +++ b/packages/nvidia_nat_rag_lib/src/nat/plugins/rag_lib/register.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa +# isort:skip_file + +# Import any providers which need to be automatically registered here + +from . import client diff --git a/packages/nvidia_nat_rag_lib/tests/test_rag_lib_function.py b/packages/nvidia_nat_rag_lib/tests/test_rag_lib_function.py new file mode 100644 index 0000000000..4933d290aa --- /dev/null +++ b/packages/nvidia_nat_rag_lib/tests/test_rag_lib_function.py @@ -0,0 +1,299 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for NVIDIA RAG library integration.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock +from unittest.mock import MagicMock + +import pytest +from pydantic import HttpUrl + +from nat.data_models.component_ref import EmbedderRef +from nat.data_models.component_ref import LLMRef +from nat.data_models.component_ref import RetrieverRef +from nat.embedder.nim_embedder import NIMEmbedderModelConfig +from nat.llm.nim_llm import NIMModelConfig +from nat.retriever.milvus.register import MilvusRetrieverConfig + +# NOTE: First nvidia_rag import takes ~20s due to module-level initialization. + +# ============================================================================= +# Fixtures +# ============================================================================= + +LLM_CONFIGS: dict[str, NIMModelConfig] = { + "nim_llm_llama8b": + NIMModelConfig( + model_name="meta/llama-3.1-8b-instruct", + base_url="https://integrate.api.nvidia.com/v1", + temperature=0.2, + top_p=0.95, + max_tokens=4096, + ), + "nim_llm_llama70b": + NIMModelConfig( + model_name="meta/llama-3.1-70b-instruct", + base_url="https://integrate.api.nvidia.com/v1", + temperature=0.1, + top_p=0.9, + max_tokens=4096, + ), +} + +EMBEDDER_CONFIGS: dict[str, NIMEmbedderModelConfig] = { + # nvidia/llama-3.2-nv-embedqa-1b-v2: supports dimensions parameter + "nim_embedder": + NIMEmbedderModelConfig( + model_name="nvidia/llama-3.2-nv-embedqa-1b-v2", + base_url="https://integrate.api.nvidia.com/v1", + ), # nvidia/nv-embedqa-e5-v5: REJECTS dimensions param + "nim_embedder_e5": + NIMEmbedderModelConfig( + model_name="nvidia/nv-embedqa-e5-v5", + base_url="https://integrate.api.nvidia.com/v1", + ), +} + +RETRIEVER_CONFIGS: dict[str, MilvusRetrieverConfig] = { + "milvus_retriever": + MilvusRetrieverConfig( + uri=HttpUrl("http://localhost:19530"), + collection_name="test_collection", + embedding_model="nim_embedder", + ), +} + + +@pytest.fixture(name="mock_builder") +def fixture_mock_builder() -> MagicMock: + """Create mock NAT builder with component resolution.""" + builder: MagicMock = MagicMock() + + def get_llm_config(ref: LLMRef) -> NIMModelConfig: + return LLM_CONFIGS[str(ref)] + + builder.get_llm_config = MagicMock(side_effect=get_llm_config) + + def get_embedder_config(ref: EmbedderRef) -> NIMEmbedderModelConfig: + return EMBEDDER_CONFIGS[str(ref)] + + builder.get_embedder_config = MagicMock(side_effect=get_embedder_config) + + async def get_retriever_config(ref: RetrieverRef) -> MilvusRetrieverConfig: + return RETRIEVER_CONFIGS[str(ref)] + + builder.get_retriever_config = AsyncMock(side_effect=get_retriever_config) + + return builder + + +# ============================================================================= +# NvidiaRAG Functional Tests +# ============================================================================= + + +class TestNvidiaRAGMethods: + """Test NvidiaRAG class can be imported and has expected methods.""" + + def test_import_and_instantiate_nvidia_rag(self) -> None: + """Verify nvidia_rag can be imported and instantiated.""" + from nvidia_rag import NvidiaRAG + + rag = NvidiaRAG() + assert rag is not None + assert isinstance(rag, NvidiaRAG) + + def test_generate_method_exists(self) -> None: + """NvidiaRAG should have a generate method.""" + from nvidia_rag import NvidiaRAG + + assert hasattr(NvidiaRAG, "generate") + assert callable(NvidiaRAG.generate) + + def test_search_method_exists(self) -> None: + """NvidiaRAG should have a search method.""" + from nvidia_rag import NvidiaRAG + + assert hasattr(NvidiaRAG, "search") + assert callable(NvidiaRAG.search) + + def test_health_method_exists(self) -> None: + """NvidiaRAG should have a health method.""" + from nvidia_rag import NvidiaRAG + + assert hasattr(NvidiaRAG, "health") + assert callable(NvidiaRAG.health) + + +# ============================================================================= +# Integration Tests +# ============================================================================= + + +@pytest.mark.integration +class TestNvidiaRAGIntegration: + """Integration tests for NvidiaRAG with live services.""" + + @pytest.fixture(name="create_collection") + def fixture_create_collection(self): + """Factory to create Milvus collections with specific embedding models.""" + from langchain_core.documents import Document + from langchain_milvus import Milvus + from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings + from pymilvus import MilvusClient + + created: list[str] = [] + + def _create(embedder_ref: str) -> str: + import re + + model_name = EMBEDDER_CONFIGS[embedder_ref].model_name + sanitized = re.sub(r"[^a-zA-Z0-9_]", "_", model_name) + collection_name = f"test_{sanitized}" + client = MilvusClient(uri="http://localhost:19530") + if client.has_collection(collection_name): + client.drop_collection(collection_name) + + embeddings = NVIDIAEmbeddings(model=model_name) + Milvus.from_documents( + documents=[Document(page_content="Test document", metadata={"source": "test"})], + embedding=embeddings, + collection_name=collection_name, + connection_args={"uri": "http://localhost:19530"}, + ) + created.append(collection_name) + return collection_name + + yield _create + + client = MilvusClient(uri="http://localhost:19530") + for name in created: + if client.has_collection(name): + client.drop_collection(name) + + @pytest.mark.parametrize("llm_ref", list(LLM_CONFIGS.keys())) + @pytest.mark.parametrize( + "embedder_ref", + [ + "nim_embedder", + pytest.param( + "nim_embedder_e5", + marks=pytest.mark.xfail(reason="nvidia_rag passes dimensions param which nv-embedqa-e5-v5 rejects")), + ]) + @pytest.mark.parametrize("retriever_ref", list(RETRIEVER_CONFIGS.keys())) + async def test_search( + self, + mock_builder: MagicMock, + create_collection, + llm_ref: str, + embedder_ref: str, + retriever_ref: str, + ) -> None: + """Test NvidiaRAG search() with different component configs.""" + from nvidia_rag import NvidiaRAG + from nvidia_rag.utils.configuration import NvidiaRAGConfig + + collection_name = create_collection(embedder_ref) + + llm_config = LLM_CONFIGS[llm_ref] + embedder_config = EMBEDDER_CONFIGS[embedder_ref] + + rag_config = NvidiaRAGConfig() + rag_config.llm.model_name = llm_config.model_name + rag_config.llm.server_url = llm_config.base_url + rag_config.embeddings.model_name = embedder_config.model_name + rag_config.embeddings.server_url = embedder_config.base_url + rag_config.vector_store.url = "http://localhost:19530" + rag_config.vector_store.default_collection_name = collection_name + + rag = NvidiaRAG(config=rag_config) + result = await rag.search(query="test query") + + assert result is not None + + @pytest.mark.parametrize("llm_ref", list(LLM_CONFIGS.keys())) + @pytest.mark.parametrize( + "embedder_ref", + [ + "nim_embedder", + pytest.param( + "nim_embedder_e5", + marks=pytest.mark.xfail(reason="nvidia_rag passes dimensions param which nv-embedqa-e5-v5 rejects")), + ]) + @pytest.mark.parametrize("retriever_ref", list(RETRIEVER_CONFIGS.keys())) + async def test_generate( + self, + mock_builder: MagicMock, + llm_ref: str, + embedder_ref: str, + retriever_ref: str, + ) -> None: + """Test NvidiaRAG generate() with different component configs.""" + from nvidia_rag import NvidiaRAG + from nvidia_rag.utils.configuration import NvidiaRAGConfig + + llm_config = LLM_CONFIGS[llm_ref] + embedder_config = EMBEDDER_CONFIGS[embedder_ref] + + rag_config = NvidiaRAGConfig() + rag_config.llm.model_name = llm_config.model_name + rag_config.llm.server_url = llm_config.base_url + rag_config.embeddings.model_name = embedder_config.model_name + rag_config.embeddings.server_url = embedder_config.base_url + rag_config.vector_store.url = "http://localhost:19530" + + rag = NvidiaRAG(config=rag_config) + messages = [{"role": "user", "content": "What is RAG?"}] + result = await rag.generate(messages=messages, use_knowledge_base=False) + + assert result is not None + + @pytest.mark.parametrize("llm_ref", list(LLM_CONFIGS.keys())) + @pytest.mark.parametrize( + "embedder_ref", + [ + "nim_embedder", + pytest.param( + "nim_embedder_e5", + marks=pytest.mark.xfail(reason="nvidia_rag passes dimensions param which nv-embedqa-e5-v5 rejects")), + ]) + @pytest.mark.parametrize("retriever_ref", list(RETRIEVER_CONFIGS.keys())) + async def test_health( + self, + mock_builder: MagicMock, + llm_ref: str, + embedder_ref: str, + retriever_ref: str, + ) -> None: + """Test NvidiaRAG health() with different component configs.""" + from nvidia_rag import NvidiaRAG + from nvidia_rag.utils.configuration import NvidiaRAGConfig + + llm_config = LLM_CONFIGS[llm_ref] + embedder_config = EMBEDDER_CONFIGS[embedder_ref] + + rag_config = NvidiaRAGConfig() + rag_config.llm.model_name = llm_config.model_name + rag_config.llm.server_url = llm_config.base_url + rag_config.embeddings.model_name = embedder_config.model_name + rag_config.embeddings.server_url = embedder_config.base_url + rag_config.vector_store.url = "http://localhost:19530" + + rag = NvidiaRAG(config=rag_config) + result = await rag.health() + + assert result is not None diff --git a/packages/nvidia_nat_rag_lib/tests/test_rag_lib_models.py b/packages/nvidia_nat_rag_lib/tests/test_rag_lib_models.py new file mode 100644 index 0000000000..23c7aa6708 --- /dev/null +++ b/packages/nvidia_nat_rag_lib/tests/test_rag_lib_models.py @@ -0,0 +1,83 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +import pytest +from nvidia_rag.rag_server.response_generator import Citations + +from nat.plugins.rag_lib.models import RAGGenerateResult +from nat.plugins.rag_lib.models import RAGSearchResult + + +class TestRAGSearchResult: + """Tests for RAGSearchResult model.""" + + @pytest.fixture + def citations(self) -> Citations: + """Create Citations object.""" + return Citations(total_results=2, results=[]) + + def test_creation(self, citations: Citations) -> None: + """Test RAGSearchResult can be created with citations.""" + result = RAGSearchResult(citations=citations) + assert result.citations is citations + + def test_str_returns_json(self, citations: Citations) -> None: + """Test __str__ returns JSON from citations.model_dump_json().""" + result = RAGSearchResult(citations=citations) + output = str(result) + + parsed = json.loads(output) + assert parsed["total_results"] == 2 + + +class TestRAGGenerateResult: + """Tests for RAGGenerateResult model.""" + + @pytest.fixture + def citations(self) -> Citations: + """Create Citations object.""" + return Citations(total_results=1, results=[]) + + def test_creation_with_answer_only(self) -> None: + """Test RAGGenerateResult can be created with just an answer.""" + result = RAGGenerateResult(answer="This is the answer.") + assert result.answer == "This is the answer." + assert result.citations is None + + def test_creation_with_citations(self, citations: Citations) -> None: + """Test RAGGenerateResult can be created with answer and citations.""" + result = RAGGenerateResult(answer="Answer with sources.", citations=citations) + assert result.answer == "Answer with sources." + assert result.citations is citations + + def test_str_without_citations(self) -> None: + """Test __str__ excludes citations when None.""" + result = RAGGenerateResult(answer="Just an answer.") + output = str(result) + + parsed = json.loads(output) + assert parsed["answer"] == "Just an answer." + assert "citations" not in parsed + + def test_str_with_citations(self, citations: Citations) -> None: + """Test __str__ includes citations when present.""" + result = RAGGenerateResult(answer="Answer.", citations=citations) + output = str(result) + + parsed = json.loads(output) + assert parsed["answer"] == "Answer." + assert "citations" in parsed diff --git a/packages/nvidia_nat_rag_lib/tests/test_tools.py b/packages/nvidia_nat_rag_lib/tests/test_tools.py new file mode 100644 index 0000000000..e3b6cd1fbc --- /dev/null +++ b/packages/nvidia_nat_rag_lib/tests/test_tools.py @@ -0,0 +1,140 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import AsyncMock +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +from nvidia_rag.rag_server.response_generator import Citations + +from nat.builder.builder import Builder +from nat.plugins.rag_lib.client import NvidiaRAGLibConfig +from nat.plugins.rag_lib.client import nvidia_rag_lib +from nat.plugins.rag_lib.models import RAGGenerateResult +from nat.plugins.rag_lib.models import RAGSearchResult + + +class TestNvidiaRAGLib: + + @pytest.fixture(name="mock_builder") + def fixture_mock_builder(self) -> MagicMock: + from pydantic import HttpUrl + + from nat.embedder.nim_embedder import NIMEmbedderModelConfig + from nat.llm.nim_llm import NIMModelConfig + from nat.retriever.milvus.register import MilvusRetrieverConfig + + builder = MagicMock(spec=Builder) + builder.get_llm_config = MagicMock(return_value=NIMModelConfig( + model_name="meta/llama-3.1-8b-instruct", + base_url="https://integrate.api.nvidia.com/v1", + )) + builder.get_embedder_config = MagicMock(return_value=NIMEmbedderModelConfig( + model_name="nvidia/llama-3.2-nv-embedqa-1b-v2", + base_url="https://integrate.api.nvidia.com/v1", + )) + builder.get_retriever_config = AsyncMock(return_value=MilvusRetrieverConfig( + uri=HttpUrl("http://localhost:19530"), + collection_name="test_collection", + embedding_model="nim_embedder", + )) + return builder + + @pytest.fixture(name="config") + def fixture_config(self) -> NvidiaRAGLibConfig: + from nat.data_models.component_ref import EmbedderRef + from nat.data_models.component_ref import LLMRef + from nat.data_models.component_ref import RetrieverRef + return NvidiaRAGLibConfig( + llm=LLMRef("nim_llm"), + embedder=EmbedderRef("nim_embedder"), + retriever=RetrieverRef("cuda_retriever"), + collection_names=["test_collection"], + ) + + @pytest.fixture(name="mock_rag_client") + def fixture_mock_rag_client(self) -> MagicMock: + client = MagicMock() + client.search = AsyncMock(return_value=Citations(total_results=3, results=[])) + return client + + async def test_search_returns_results(self, + config: NvidiaRAGLibConfig, + mock_builder: MagicMock, + mock_rag_client: MagicMock) -> None: + with patch("nvidia_rag.NvidiaRAG", return_value=mock_rag_client): + async with nvidia_rag_lib(config, mock_builder) as group: + functions = await group.get_all_functions() + search_fn = next((f for name, f in functions.items() if name.endswith("search")), None) + assert search_fn is not None + + result = await search_fn.acall_invoke(query="test query") + + assert isinstance(result, RAGSearchResult) + assert result.citations.total_results == 3 + + async def test_generate_returns_answer(self, + config: NvidiaRAGLibConfig, + mock_builder: MagicMock, + mock_rag_client: MagicMock) -> None: + + async def mock_stream(): + yield 'data: {"id": "1", "model": "m", "choices": [{"delta": {"content": "Hello"}}]}' + yield 'data: {"id": "1", "model": "m", "choices": [{"delta": {"content": " world"}}]}' + yield 'data: [DONE]' + + mock_rag_client.generate = AsyncMock(return_value=mock_stream()) + + with patch("nvidia_rag.NvidiaRAG", return_value=mock_rag_client): + async with nvidia_rag_lib(config, mock_builder) as group: + functions = await group.get_all_functions() + generate_fn = next((f for name, f in functions.items() if name.endswith("generate")), None) + assert generate_fn is not None + + result = await generate_fn.acall_invoke(query="test") + + assert isinstance(result, RAGGenerateResult) + assert result.answer == "Hello world" + + async def test_generate_handles_empty_stream(self, + config: NvidiaRAGLibConfig, + mock_builder: MagicMock, + mock_rag_client: MagicMock) -> None: + + async def mock_empty_stream(): + yield 'data: [DONE]' + + mock_rag_client.generate = AsyncMock(return_value=mock_empty_stream()) + + with patch("nvidia_rag.NvidiaRAG", return_value=mock_rag_client): + async with nvidia_rag_lib(config, mock_builder) as group: + functions = await group.get_all_functions() + generate_fn = next((f for name, f in functions.items() if name.endswith("generate")), None) + result = await generate_fn.acall_invoke(query="test") + + assert isinstance(result, RAGGenerateResult) + assert result.answer == "No response generated." + + async def test_group_exposes_both_tools(self, + config: NvidiaRAGLibConfig, + mock_builder: MagicMock, + mock_rag_client: MagicMock) -> None: + with patch("nvidia_rag.NvidiaRAG", return_value=mock_rag_client): + async with nvidia_rag_lib(config, mock_builder) as group: + functions = await group.get_all_functions() + function_names = list(functions.keys()) + assert any(name.endswith("search") for name in function_names) + assert any(name.endswith("generate") for name in function_names) diff --git a/packages/nvidia_nat_rag_lib/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl b/packages/nvidia_nat_rag_lib/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl new file mode 100644 index 0000000000..e2187d675c Binary files /dev/null and b/packages/nvidia_nat_rag_lib/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl differ diff --git a/pyproject.toml b/pyproject.toml index 27279fb182..5cfa47e8fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,6 +110,7 @@ opentelemetry = ["nvidia-nat-opentelemetry"] phoenix = ["nvidia-nat-phoenix"] pii-defense = ["presidio-analyzer", "presidio-anonymizer"] profiling = ["nvidia-nat-profiling"] # meta-package +rag-lib = ["nvidia-nat-rag-lib"] ragaai = ["nvidia-nat-ragaai"] mysql = ["nvidia-nat-mysql"] redis = ["nvidia-nat-redis"] @@ -251,6 +252,7 @@ nvidia-nat-mysql = { workspace = true } nvidia-nat-opentelemetry = { workspace = true } nvidia-nat-phoenix = { workspace = true } nvidia-nat-profiling = { workspace = true } +nvidia-nat-rag-lib = { workspace = true } nvidia-nat-ragaai = { workspace = true } nvidia-nat-redis = { workspace = true } nvidia-nat-s3 = { workspace = true } diff --git a/scripts/langchain_web_ingest.py b/scripts/langchain_web_ingest.py index 604b26099e..5942f1e55a 100644 --- a/scripts/langchain_web_ingest.py +++ b/scripts/langchain_web_ingest.py @@ -21,6 +21,7 @@ from langchain_milvus import Milvus from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter +from pymilvus import MilvusClient from web_utils import cache_html from web_utils import get_file_path_from_url from web_utils import scrape @@ -38,9 +39,18 @@ async def main(*, milvus_uri: str, collection_name: str, clean_cache: bool = True, + drop_collection: bool = False, embedding_model: str = "nvidia/nv-embedqa-e5-v5", base_path: str = "./.tmp/data"): + if drop_collection: + client = MilvusClient(uri=milvus_uri) + if client.has_collection(collection_name): + logger.info("Dropping existing collection: %s", collection_name) + client.drop_collection(collection_name) + else: + logger.info("Collection '%s' does not exist, nothing to drop", collection_name) + embedder = NVIDIAEmbeddings(model=embedding_model, truncate="END") # Create the Milvus vector store @@ -131,6 +141,11 @@ async def main(*, parser.add_argument("--collection_name", "-n", default=CUDA_COLLECTION_NAME, help="Collection name for the data.") parser.add_argument("--milvus_uri", "-u", default=DEFAULT_URI, help="Milvus host URI") parser.add_argument("--clean_cache", default=False, help="If true, deletes local files", action="store_true") + parser.add_argument("--drop_collection", + default=False, + help="Drop existing collection before ingesting", + action="store_true") + parser.add_argument("--embedding_model", "-e", default="nvidia/nv-embedqa-e5-v5", help="Embedding model to use") args = parser.parse_args() if len(args.urls) == 0: @@ -142,4 +157,6 @@ async def main(*, milvus_uri=args.milvus_uri, collection_name=args.collection_name, clean_cache=args.clean_cache, + drop_collection=args.drop_collection, + embedding_model=args.embedding_model, )) diff --git a/src/nat/embedder/nim_embedder.py b/src/nat/embedder/nim_embedder.py index 85c27c0fa3..859ecd58ca 100644 --- a/src/nat/embedder/nim_embedder.py +++ b/src/nat/embedder/nim_embedder.py @@ -50,6 +50,7 @@ class NIMEmbedderModelConfig(EmbedderBaseConfig, RetryMixin, name="nim"): truncate: TruncationOption = Field(default="NONE", description=("The truncation strategy if the input on the " "server side if it's too large.")) + dimensions: int | None = Field(default=None, description="Embedding output dimensions.") model_config = ConfigDict(protected_namespaces=(), extra="allow")