diff --git a/packages/nvidia_nat_rag/LICENSE.md b/packages/nvidia_nat_rag/LICENSE.md new file mode 100644 index 0000000000..260cc77d47 --- /dev/null +++ b/packages/nvidia_nat_rag/LICENSE.md @@ -0,0 +1,190 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, +and distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by +the copyright owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all +other entities that control, are controlled by, or are under common +control with that entity. For the purposes of this definition, +"control" means (i) the power, direct or indirect, to cause the +direction or management of such entity, whether by contract or +otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity +exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, +including but not limited to software source code, documentation +source, and configuration files. + +"Object" form shall mean any form resulting from mechanical +transformation or translation of a Source form, including but +not limited to compiled object code, generated documentation, +and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or +Object form, made available under the License, as indicated by a +copyright notice that is included in or attached to the work +(an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object +form, that is based on (or derived from) the Work and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. For the purposes +of this License, Derivative Works shall not include works that remain +separable from, or merely link (or bind by name) to the interfaces of, +the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including +the original version of the Work and any modifications or additions +to that Work or Derivative Works thereof, that is intentionally +submitted to the Licensor for inclusion in the Work by the copyright owner +or by an individual or Legal Entity authorized to submit on behalf of +the copyright owner. For the purposes of this definition, "submitted" +means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, +and issue tracking systems that are managed by, or on behalf of, the +Licensor for the purpose of discussing and improving the Work, but +excluding communication that is conspicuously marked or otherwise +designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity +on behalf of whom a Contribution has been received by Licensor and +subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the +Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of +this License, each Contributor hereby grants to You a perpetual, +worldwide, non-exclusive, no-charge, royalty-free, irrevocable +(except as stated in this section) patent license to make, have made, +use, offer to sell, sell, import, and otherwise transfer the Work, +where such license applies only to those patent claims licensable +by such Contributor that are necessarily infringed by their +Contribution(s) alone or by combination of their Contribution(s) +with the Work to which such Contribution(s) was submitted. If You +institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work +or a Contribution incorporated within the Work constitutes direct +or contributory patent infringement, then any patent licenses +granted to You under this License for that Work shall terminate +as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the +Work or Derivative Works thereof in any medium, with or without +modifications, and in Source or Object form, provided that You +meet the following conditions: + +(a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + +(b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + +(c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + +(d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + +You may add Your own copyright statement to Your modifications and +may provide additional or different license terms and conditions +for use, reproduction, or distribution of Your modifications, or +for any such Derivative Works as a whole, provided Your use, +reproduction, and distribution of the Work otherwise complies with +the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, +any Contribution intentionally submitted for inclusion in the Work +by You to the Licensor shall be under the terms and conditions of +this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify +the terms of any separate license agreement you may have executed +with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade +names, trademarks, service marks, or product names of the Licensor, +except as required for reasonable and customary use in describing the +origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or +agreed to in writing, Licensor provides the Work (and each +Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +implied, including, without limitation, any warranties or conditions +of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A +PARTICULAR PURPOSE. You are solely responsible for determining the +appropriateness of using or redistributing the Work and assume any +risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, +whether in tort (including negligence), contract, or otherwise, +unless required by applicable law (such as deliberate and grossly +negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, +incidental, or consequential damages of any character arising as a +result of this License or out of the use or inability to use the +Work (including but not limited to damages for loss of goodwill, +work stoppage, computer failure or malfunction, or any and all +other commercial damages or losses), even if such Contributor +has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing +the Work or Derivative Works thereof, You may choose to offer, +and charge a fee for, acceptance of support, warranty, indemnity, +or other liability obligations and/or rights consistent with this +License. However, in accepting such obligations, You may act only +on Your own behalf and on Your sole responsibility, not on behalf +of any other Contributor, and only if You agree to indemnify, +defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason +of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +Copyright 2025-2026 NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/packages/nvidia_nat_rag/README.md b/packages/nvidia_nat_rag/README.md new file mode 100644 index 0000000000..dc27f7ef29 --- /dev/null +++ b/packages/nvidia_nat_rag/README.md @@ -0,0 +1,200 @@ +# NVIDIA NAT RAG Plugin + +This plugin integrates [NVIDIA RAG](https://github.com/NVIDIA-AI-Blueprints/rag) with NeMo Agent Toolkit, providing RAG query and search capabilities for your agent workflows. + +## Prerequisites + +- Python 3.11+ +- NeMo Agent Toolkit installed +- Access to NVIDIA AI endpoints (API key required) +- Milvus vector database running (default: `localhost:19530`) + +## Installation + +### 1. Install the Plugin + +From the NeMo Agent Toolkit root directory: + +```bash +# Activate your virtual environment +source .venv/bin/activate + +# Install the plugin in editable mode +uv pip install -e packages/nvidia_nat_rag +``` + +### 2. Set Environment Variables + +```bash +# Required: NVIDIA API key for embeddings, reranking, and LLM +export NVIDIA_API_KEY="your-nvidia-api-key" + +# Optional: If using custom endpoints +# export NVIDIA_BASE_URL="https://integrate.api.nvidia.com/v1" +``` + +### 3. Start Milvus (Vector Database) + +The plugin requires a Milvus instance. You can start one using Docker: + +```bash +# Using Milvus Lite (for development) +# The plugin will automatically use milvus-lite if installed + +# Or start a full Milvus instance with Docker +docker run -d --name milvus \ + -p 19530:19530 \ + -p 9091:9091 \ + milvusdb/milvus:latest +``` + +## Configuration + +### Sample Config File + +The plugin includes a sample configuration at: +``` +packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml +``` + +### Available Functions + +#### `nvidia_rag_query` +Queries documents using NVIDIA RAG and returns an AI-generated response. + +```yaml +functions: + rag_query: + _type: nvidia_rag_query + config_file: config.yaml # Path to nvidia_rag config + collection_names: ["my_docs"] # Milvus collection names + vdb_endpoint: "http://localhost:19530" + use_knowledge_base: true + # embedding_endpoint: "localhost:9080" # Optional: for on-prem embeddings +``` + +#### `nvidia_rag_search` +Searches for relevant document chunks without generating a response. + +```yaml +functions: + rag_search: + _type: nvidia_rag_search + config_file: config.yaml + collection_names: ["my_docs"] + vdb_endpoint: "http://localhost:19530" + reranker_top_k: 3 # Number of results after reranking + vdb_top_k: 20 # Number of results from vector search +``` + +## Usage + +### Running a RAG Workflow + +```bash +nat run \ + --config_file packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml \ + --input "What is the price of a hammer?" +``` + +### Example Workflow Config + +```yaml +functions: + rag_query: + _type: nvidia_rag_query + config_file: config.yaml + collection_names: ["product_catalog"] + vdb_endpoint: "http://localhost:19530" + use_knowledge_base: true + + rag_search: + _type: nvidia_rag_search + config_file: config.yaml + collection_names: ["product_catalog"] + vdb_endpoint: "http://localhost:19530" + reranker_top_k: 3 + vdb_top_k: 20 + + current_datetime: + _type: current_datetime + +llms: + nim_llm: + _type: nim + model_name: meta/llama-3.1-70b-instruct + temperature: 0.0 + +workflow: + _type: react_agent + tool_names: + - rag_query + - rag_search + - current_datetime + llm_name: nim_llm + verbose: true +``` + +## Troubleshooting + +### Error: Function type `nvidia_rag_query` not found + +The plugin is not installed. Run: +```bash +uv pip install -e packages/nvidia_nat_rag +``` + +### Error: Token limit exceeded + +If you get a token limit error, reduce the number of results returned: +```yaml +rag_search: + _type: nvidia_rag_search + reranker_top_k: 1 # Reduce from 3 + vdb_top_k: 10 # Reduce from 20 +``` + +This often happens when documents contain large base64-encoded images (charts, figures). + +### Error: Connection refused to Milvus + +Ensure Milvus is running: +```bash +# Check if Milvus is running +docker ps | grep milvus + +# Start Milvus if not running +docker start milvus +``` + +### Error: NVIDIA API key not set + +```bash +export NVIDIA_API_KEY="your-api-key" +``` + +## Directory Structure + +``` +packages/nvidia_nat_rag/ +├── LICENSE.md +├── README.md # This file +├── pyproject.toml # Package configuration +├── src/ +│ └── nat/ +│ ├── meta/ +│ │ └── pypi.md +│ └── plugins/ +│ └── rag/ +│ ├── __init__.py +│ ├── configs/ +│ │ └── config.yml # Sample config +│ ├── rag_functions.py # RAG function implementations +│ └── register.py # Plugin registration +└── vendor/ + └── nvidia_rag-2.4.0.dev0-py3-none-any.whl # Vendored dependency +``` + +## License + +Apache-2.0 diff --git a/packages/nvidia_nat_rag/pyproject.toml b/packages/nvidia_nat_rag/pyproject.toml new file mode 100644 index 0000000000..a54acba8a7 --- /dev/null +++ b/packages/nvidia_nat_rag/pyproject.toml @@ -0,0 +1,58 @@ +[build-system] +build-backend = "setuptools.build_meta" +requires = ["setuptools >= 64", "setuptools-scm>=8"] + + +[tool.setuptools.packages.find] +where = ["src"] +include = ["nat.*"] + + +[tool.setuptools_scm] +git_describe_command = "git describe --long --first-parent" +root = "../.." + + +[project] +name = "nvidia-nat-rag" +dynamic = ["version"] +dependencies = [ + # Keep package version constraints as open as possible to avoid conflicts with other packages. Always define a minimum + # version when adding a new package. If unsure, default to using `~=` instead of `==`. Does not apply to nvidia-nat packages. + # Keep sorted!!! + "langgraph>=0.2", # Required for react_agent workflow + "langchain_classic", + "nvidia-nat~=1.5", + "nvidia-rag[rag]~=2.4", +] +requires-python = ">=3.11,<3.14" +description = "Subpackage for NVIDIA RAG library integration in NeMo Agent toolkit" +readme = "src/nat/meta/pypi.md" +keywords = ["ai", "rag", "agents"] +license = { text = "Apache-2.0" } +authors = [{ name = "NVIDIA Corporation" }] +maintainers = [{ name = "NVIDIA Corporation" }] +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", +] + +[project.urls] +documentation = "https://docs.nvidia.com/nemo/agent-toolkit/latest/" +source = "https://github.com/NVIDIA/NeMo-Agent-Toolkit" + + +[tool.uv] +managed = true +config-settings = { editable_mode = "compat" } + + +[tool.uv.sources] +nvidia-nat = { workspace = true } +nvidia-rag = { path = "vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl" } + + +[project.entry-points.'nat.components'] +nat_rag = "nat.plugins.rag.register" diff --git a/packages/nvidia_nat_rag/src/nat/meta/pypi.md b/packages/nvidia_nat_rag/src/nat/meta/pypi.md new file mode 100644 index 0000000000..ab7f4b2682 --- /dev/null +++ b/packages/nvidia_nat_rag/src/nat/meta/pypi.md @@ -0,0 +1,35 @@ +# NVIDIA NeMo Agent Toolkit - RAG Plugin + +This package provides integration between NVIDIA NeMo Agent Toolkit and the NVIDIA RAG library. + +## Features + +- **RAG Query**: Query documents using RAG with configurable LLM and embeddings +- **RAG Search**: Search for relevant documents in vector database collections + +## Installation + +```bash +pip install nvidia-nat-rag +``` + +## Usage + +Add the RAG tools to your NAT workflow configuration: + +```yaml +functions: + rag_query: + _type: nvidia_rag_query + collection_names: ["my_collection"] + vdb_endpoint: "http://localhost:19530" + +workflow: + _type: react_agent + tool_names: [rag_query] + llm_name: nim_llm +``` + +## Documentation + +For more information, see the [NeMo Agent Toolkit documentation](https://docs.nvidia.com/nemo/agent-toolkit/latest/). diff --git a/packages/nvidia_nat_rag/src/nat/plugins/rag/__init__.py b/packages/nvidia_nat_rag/src/nat/plugins/rag/__init__.py new file mode 100644 index 0000000000..1312ed324b --- /dev/null +++ b/packages/nvidia_nat_rag/src/nat/plugins/rag/__init__.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""NVIDIA RAG integration for NeMo Agent Toolkit.""" diff --git a/packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml b/packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml new file mode 100644 index 0000000000..814f8b0ef9 --- /dev/null +++ b/packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml @@ -0,0 +1,45 @@ +# NVIDIA RAG Integration - Sample Configuration +# This configuration demonstrates how to use NVIDIA RAG tools with NeMo Agent Toolkit + +# RAG configuration file path (used by nvidia_rag library) +# Make sure this file exists and is properly configured for your environment +# See: https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/config.yaml + +functions: + # RAG Query Tool - Query documents and get AI-generated responses + rag_query: + _type: nvidia_rag_query + config_file: config.yaml # Path to nvidia_rag config file + collection_names: ["test_library"] + vdb_endpoint: "http://localhost:19530" + use_knowledge_base: true + # embedding_endpoint: "localhost:9080" # Uncomment for on-prem embeddings + + # RAG Search Tool - Search for relevant document chunks + rag_search: + _type: nvidia_rag_search + config_file: config.yaml + collection_names: ["test_library"] + vdb_endpoint: "http://localhost:19530" + reranker_top_k: 3 # Reduced to avoid token limit issues with large documents + vdb_top_k: 20 + + # Utility tool + current_datetime: + _type: current_datetime + +llms: + nim_llm: + _type: nim + model_name: meta/llama-3.1-70b-instruct + temperature: 0.0 + +workflow: + _type: react_agent + tool_names: + - rag_query + - rag_search + - current_datetime + llm_name: nim_llm + verbose: true + parse_agent_response_max_retries: 3 diff --git a/packages/nvidia_nat_rag/src/nat/plugins/rag/rag_functions.py b/packages/nvidia_nat_rag/src/nat/plugins/rag/rag_functions.py new file mode 100644 index 0000000000..e9109c0c83 --- /dev/null +++ b/packages/nvidia_nat_rag/src/nat/plugins/rag/rag_functions.py @@ -0,0 +1,213 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""NVIDIA RAG query and search functions for NeMo Agent Toolkit.""" + +import json +import logging +from typing import Optional + +from pydantic import Field + +from nat.builder.builder import Builder +from nat.builder.function_info import FunctionInfo +from nat.cli.register_workflow import register_function +from nat.data_models.function import FunctionBaseConfig + +logger = logging.getLogger(__name__) + + +class NvidiaRAGQueryConfig(FunctionBaseConfig, name="nvidia_rag_query"): + """ + Tool that queries documents using NVIDIA RAG. + Sends a chat-style query to the RAG system using configured models and endpoints. + """ + + config_file: str = Field( + default="config.yaml", + description="Path to the NVIDIA RAG configuration YAML file.", + ) + collection_names: list[str] = Field( + default_factory=list, + description="List of collection names to query from.", + ) + vdb_endpoint: str = Field( + default="http://localhost:19530", + description="Vector database endpoint URL.", + ) + embedding_endpoint: Optional[str] = Field( + default=None, + description="Embedding endpoint URL. If None, uses cloud embeddings.", + ) + use_knowledge_base: bool = Field( + default=True, + description="Whether to use the knowledge base for RAG.", + ) + + +@register_function(config_type=NvidiaRAGQueryConfig) +async def nvidia_rag_query(config: NvidiaRAGQueryConfig, builder: Builder): + """Register the NVIDIA RAG query tool.""" + + from nvidia_rag import NvidiaRAG + from nvidia_rag.utils.configuration import NvidiaRAGConfig + + # Initialize the RAG client + rag_config = NvidiaRAGConfig.from_yaml(config.config_file) + rag = NvidiaRAG(config=rag_config) + + async def _nvidia_rag_query(query: str) -> str: + """Query documents using NVIDIA RAG and return a generated response. + + This tool sends a query to the RAG system which retrieves relevant documents + from the vector database and uses an LLM to generate a response. + + Args: + query: The question or query to ask the RAG system. + + Returns: + The generated response from the RAG system based on retrieved documents. + """ + try: + response = await rag.generate( + messages=[{"role": "user", "content": query}], + use_knowledge_base=config.use_knowledge_base, + collection_names=config.collection_names, + embedding_endpoint=config.embedding_endpoint, + ) + + if response.status_code != 200: + return f"Error: RAG query failed with status code {response.status_code}" + + # Extract the response content from the streaming generator + full_response = [] + async for chunk in response.generator: + if chunk.startswith("data: "): + chunk = chunk[len("data: "):].strip() + if not chunk: + continue + try: + data = json.loads(chunk) + choices = data.get("choices", []) + if choices: + delta = choices[0].get("delta", {}) + text = delta.get("content") + if not text: + message = choices[0].get("message", {}) + text = message.get("content", "") + if text: + full_response.append(text) + except json.JSONDecodeError: + continue + + return "".join(full_response) if full_response else "No response generated." + + except Exception as e: + logger.error("RAG query failed: %s", str(e)) + return f"Error querying RAG: {str(e)}" + + yield FunctionInfo.from_fn( + _nvidia_rag_query, + description=_nvidia_rag_query.__doc__, + ) + + +class NvidiaRAGSearchConfig(FunctionBaseConfig, name="nvidia_rag_search"): + """ + Tool that searches for relevant documents in the vector database using NVIDIA RAG. + """ + + config_file: str = Field( + default="config.yaml", + description="Path to the NVIDIA RAG configuration YAML file.", + ) + collection_names: list[str] = Field( + default_factory=list, + description="List of collection names to search in.", + ) + vdb_endpoint: str = Field( + default="http://localhost:19530", + description="Vector database endpoint URL.", + ) + embedding_endpoint: Optional[str] = Field( + default=None, + description="Embedding endpoint URL. If None, uses cloud embeddings.", + ) + reranker_top_k: int = Field( + default=10, + description="Number of top results to return after reranking.", + ) + vdb_top_k: int = Field( + default=100, + description="Number of top results to retrieve from vector database before reranking.", + ) + + +@register_function(config_type=NvidiaRAGSearchConfig) +async def nvidia_rag_search(config: NvidiaRAGSearchConfig, builder: Builder): + """Register the NVIDIA RAG search tool.""" + + from nvidia_rag import NvidiaRAG + from nvidia_rag.utils.configuration import NvidiaRAGConfig + + # Initialize the RAG client + rag_config = NvidiaRAGConfig.from_yaml(config.config_file) + rag = NvidiaRAG(config=rag_config) + + async def _nvidia_rag_search(query: str) -> str: + """Search for relevant documents in the vector database. + + This tool performs a semantic search in the vector database collections + and returns relevant document chunks. + + Args: + query: The search query to find relevant documents. + + Returns: + A formatted string containing the search results with document names and content. + """ + try: + citations = await rag.search( + query=query, + collection_names=config.collection_names, + reranker_top_k=config.reranker_top_k, + vdb_top_k=config.vdb_top_k, + embedding_endpoint=config.embedding_endpoint, + ) + + if not citations or not hasattr(citations, "results") or not citations.results: + return "No documents found for the given query." + + # Format the results + results = [] + for idx, citation in enumerate(citations.results): + doc_name = getattr(citation, "document_name", f"Document {idx + 1}") + content = getattr(citation, "content", "") + doc_type = getattr(citation, "document_type", "text") + description = getattr(citation, "metadata", {}).description + print(description) + + results.append(f"**{doc_name}** (type: {doc_type}):\n{description}") + + return "\n\n---\n\n".join(results) + + except Exception as e: + logger.error("RAG search failed: %s", str(e)) + return f"Error searching documents: {str(e)}" + + yield FunctionInfo.from_fn( + _nvidia_rag_search, + description=_nvidia_rag_search.__doc__, + ) diff --git a/packages/nvidia_nat_rag/src/nat/plugins/rag/register.py b/packages/nvidia_nat_rag/src/nat/plugins/rag/register.py new file mode 100644 index 0000000000..ccbd87e44d --- /dev/null +++ b/packages/nvidia_nat_rag/src/nat/plugins/rag/register.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa + +# Import all functions to register them with NAT +from nat.plugins.rag import rag_functions diff --git a/packages/nvidia_nat_rag/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl b/packages/nvidia_nat_rag/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl new file mode 100644 index 0000000000..e2187d675c Binary files /dev/null and b/packages/nvidia_nat_rag/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl differ