Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ed9de49
Adding RAG lib integration stubs.
ericevans-nv Oct 9, 2025
2ea545b
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Oct 16, 2025
a76d50e
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Oct 20, 2025
7f77bc8
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Oct 21, 2025
fe095df
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Oct 23, 2025
0c05f40
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Oct 27, 2025
22c9e10
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Oct 28, 2025
ca34fda
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Dec 8, 2025
56353ee
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Dec 9, 2025
229028b
Pushing package structure for testing.
ericevans-nv Dec 12, 2025
13a2dae
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Dec 15, 2025
0146a2f
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Jan 14, 2026
b3a6508
Add rag_lib subpackage to support NVIDIA RAG Blueprint library with p…
ericevans-nv Jan 15, 2026
1978f2f
Add rag library mode search and generate tools.
ericevans-nv Jan 20, 2026
5329557
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Jan 21, 2026
5676a6d
Add Advanced RAG example using nvidia_rag_lib package
ericevans-nv Jan 22, 2026
c16d36f
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Jan 22, 2026
ddf8d34
Improve nvidia_rag_lib optional dependency handling and test quality
ericevans-nv Jan 22, 2026
a6edc40
Refine README and update copyright dates
ericevans-nv Jan 22, 2026
e1e3b64
Pushing temp wheel file
ericevans-nv Jan 22, 2026
760ba4c
enh(rag): improve config interface; change import deferment
willkill07 Jan 22, 2026
5b98ec5
propagate exceptions, require collection_names, cleanup tests
ericevans-nv Jan 22, 2026
abcd328
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Jan 22, 2026
27855e1
adding docstrings
ericevans-nv Jan 23, 2026
04db967
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Jan 23, 2026
0bd20be
Add reranking to Vale
ericevans-nv Jan 23, 2026
16401ff
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Jan 26, 2026
044299e
Renaming conflicting test file
ericevans-nv Jan 26, 2026
aaba429
Fixing test file
ericevans-nv Jan 26, 2026
95ff54c
Merge branch 'develop' of github.com:NVIDIA/NeMo-Agent-Toolkit into r…
ericevans-nv Jan 26, 2026
62d3001
improve doc clarity
ericevans-nv Jan 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ci/vale/styles/config/vocabularies/nat/accept.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Databricks
Datadog
[Dd]atastore
DB(s?)
[Dd]eclaratively
[Dd]enylist
[Dd]eserialize
[Dd]ev
Expand Down Expand Up @@ -153,6 +154,7 @@ Qwen
[Rr]eimplement(ing)?
[Rr]einstall(s?)
[Rr]eplatform(ing)?
[Rr]erank(er|ing)?
[Rr]epo
[Rr]eputational
[Rr]etarget(ed?)
Expand Down
209 changes: 195 additions & 14 deletions examples/RAG/simple_rag/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,37 @@ This is a simple example RAG application to showcase how one can configure and u

## Table of Contents

- [Key Features](#key-features)
- [Quickstart: RAG with Milvus](#quickstart-rag-with-milvus)
- [Installation and Setup](#installation-and-setup)
- [Install this Workflow](#install-this-workflow)
- [Set Up Milvus](#set-up-milvus)
- [Set Up API Keys](#set-up-api-keys)
- [Bootstrap Data](#bootstrap-data)
- [Configure Your Agent](#configure-your-agent)
- [Run the Workflow](#run-the-workflow)
- [Adding Long-Term Agent Memory](#adding-long-term-agent-memory)
- [Prerequisites](#prerequisites)
- [Adding Memory to the Agent](#adding-memory-to-the-agent)
- [Adding Additional Tools](#adding-additional-tools)
- [Using Test Time Compute](#using-test-time-compute)
- [Simple RAG Example](#simple-rag-example)
- [Table of Contents](#table-of-contents)
- [Key Features](#key-features)
- [Quickstart: RAG with Milvus](#quickstart-rag-with-milvus)
- [Installation and Setup](#installation-and-setup)
- [Install this Workflow](#install-this-workflow)
- [Set Up Milvus](#set-up-milvus)
- [Set Up API Keys](#set-up-api-keys)
- [Bootstrap Data](#bootstrap-data)
- [Configure Your Agent](#configure-your-agent)
- [Run the Workflow](#run-the-workflow)
- [Adding Long-Term Agent Memory](#adding-long-term-agent-memory)
- [Prerequisites](#prerequisites)
- [Adding Memory to the Agent](#adding-memory-to-the-agent)
- [Adding Additional Tools](#adding-additional-tools)
- [Using Test Time Compute](#using-test-time-compute)
- [Advanced RAG with NVIDIA RAG Library](#advanced-rag-with-nvidia-rag-library)
- [What the Library Provides](#what-the-library-provides)
- [Prerequisites](#prerequisites-1)
- [Bootstrap Data](#bootstrap-data-1)
- [Key Capabilities](#key-capabilities)
- [Integration with NeMo Agent Toolkit Components](#integration-with-nemo-agent-toolkit-components)
- [RAG-Specific Configuration](#rag-specific-configuration)
- [Example Configuration](#example-configuration)
- [Run the Workflow](#run-the-workflow-1)

## Key Features

- **Milvus Vector Database Integration:** Demonstrates the `milvus_retriever` component for storing and retrieving document embeddings from CUDA and MCP documentation.
- **ReAct Agent with RAG:** Shows how a `react_agent` can use retriever tools to answer questions by searching through indexed documentation.
- **Advanced RAG Pipeline with NVIDIA RAG Library:** Showcases enhanced retrieval with semantic reranking, query rewriting, confidence filtering, and structured citations.
- **Long-term Memory with Mem0:** Includes integration with Mem0 platform for persistent memory, allowing the agent to remember user preferences across sessions.
- **Multi-Collection Retrieval:** Demonstrates multiple retriever tools (`cuda_retriever_tool` and `mcp_retriever_tool`) for searching different knowledge bases.
- **Additional Tool Integration:** Shows how to extend the RAG system with complementary tools like `tavily_internet_search` and `code_generation` for comprehensive question answering.
Expand Down Expand Up @@ -356,3 +368,172 @@ The final workflow result should look similar to the following:
```console
['CUDA and MCP are two distinct technologies with different purposes and cannot be directly compared. CUDA is a parallel computing platform and programming model, primarily used for compute-intensive tasks such as scientific simulations, data analytics, and machine learning, whereas MCP is an open protocol designed for providing context to Large Language Models (LLMs), particularly for natural language processing and other AI-related tasks. While they serve different purposes, CUDA and MCP share a common goal of enabling developers to create powerful and efficient applications. They are complementary technologies that can be utilized together in certain applications to achieve innovative outcomes, although their differences in design and functionality set them apart. In essence, CUDA focuses on parallel computing and is developed by NVIDIA, whereas MCP is focused on context provision for LLMs, making them unique in their respective fields but potentially synergistic in specific use cases.']
```

## Advanced RAG with NVIDIA RAG Library

The NVIDIA RAG Library (`nvidia_rag_lib`) integrates the [NVIDIA RAG Blueprint](https://github.com/NVIDIA-AI-Blueprints/rag) pipeline into NeMo Agent Toolkit.

The library handles the complexity of multi-stage retrieval, semantic reranking, and query optimization, allowing you to focus on building your application rather than implementing RAG infrastructure.

### What the Library Provides

The `nvidia_rag_lib` library provides agent tools powered by the NVIDIA RAG pipeline.

- **Multi-stage retrieval** with configurable candidate pools and reranking
- **Semantic reranking** using NeMo Retriever models
- **Query rewriting** via LLM-based query optimization
- **Confidence filtering** to ensure result quality
- **Structured citations** for source attribution
- **Multi-collection search** across multiple knowledge bases

All of these features are managed by the library and configured declaratively in YAML, with no custom code required.

### Prerequisites

Install the NVIDIA RAG Library:
```bash
uv pip install -e packages/nvidia_nat_rag_lib
```

### Bootstrap Data

> [!IMPORTANT]
> The NVIDIA RAG Library example uses a different embedding model (`nvidia/llama-3.2-nv-embedqa-1b-v2`) than the basic quickstart. If you have an existing `cuda_docs` collection from the quickstart, drop and re-ingest with the correct embedding model:

```bash
python scripts/langchain_web_ingest.py \
-n cuda_docs \
-e nvidia/llama-3.2-nv-embedqa-1b-v2 \
--drop_collection
```

### Key Capabilities

The `nvidia_rag_lib` library orchestrates a multi-stage retrieval pipeline with the following capabilities:

- **Two-stage retrieval:** Combines broad vector search (recall) with semantic reranking (precision) to surface the most relevant results
- **Query rewriting:** LLM reformulates ambiguous or conversational queries before searching
- **Confidence filtering:** Automatically filters out low-quality matches below a configurable threshold
- **Structured citations:** Returns document metadata (name, relevance score) for source attribution

### Integration with NeMo Agent Toolkit Components

The `nvidia_rag_lib` library integrates with standard NeMo Agent toolkit components. You configure `llms`, `embedders`, and `retrievers` sections as usual. The library references these components by name:

```yaml
function_groups:
cuda_qa:
_type: nvidia_rag_lib
llm: nim_llm # References llms.nim_llm
embedder: nim_embedder # References embedders.nim_embedder
retriever: cuda_retriever # References retrievers.cuda_retriever
```

This means you can reuse existing NeMo Agent toolkit infrastructure definitions and swap in the RAG library without changing your LLM, embedder, or retriever configurations.

### RAG-Specific Configuration

The library adds configuration specific to the RAG pipeline. These fields differ from a standard NeMo Agent toolkit retriever setup:

| Field | Purpose |
|-------|---------|
| `topic` | Description for agent tool selection |
| `collection_names` | Milvus collections to search |
| `reranker_top_k` | Number of results after reranking |
| `rag_pipeline.enable_citations` | Include document metadata in results |
| `rag_pipeline.default_confidence_threshold` | Filter low-confidence results |
| `rag_pipeline.ranking.enable_reranker` | Enable semantic reranking |
| `rag_pipeline.ranking.model_name` | Reranker model to use |
| `rag_pipeline.query_rewriter.enabled` | Enable LLM query rewriting |

### Example Configuration

```yaml
function_groups:
cuda_qa:
_type: nvidia_rag_lib
include:
- search
llm: nim_llm
embedder: nim_embedder
retriever: cuda_retriever
topic: NVIDIA CUDA library
collection_names:
- cuda_docs
reranker_top_k: 10
rag_pipeline:
enable_citations: true
default_confidence_threshold: 0.25
ranking:
enable_reranker: true
model_name: nvidia/llama-3.2-nv-rerankqa-1b-v2
query_rewriter:
enabled: true
```

### Run the Workflow

```bash
nat run --config_file examples/RAG/simple_rag/configs/rag_library_mode_config.yml \
--input "How do I install CUDA"
```

The logs show the pipeline stages in action:

```console
INFO:nvidia_rag.rag_server.main:Setting top k as: 100.
INFO:nvidia_rag.rag_server.main:Narrowing the collection from 100 results and further narrowing it to 10 with the reranker for search
INFO:nvidia_rag.rag_server.main:Setting ranker top n as: 10.
INFO:nvidia_rag.utils.vdb.milvus.milvus_vdb: Milvus Retrieval latency: 0.8911 seconds
INFO:nvidia_rag.rag_server.main: == Context reranker time: 5631.08 ms ==
INFO:nvidia_rag.utils.common:Confidence threshold filtering: 10 -> 10 documents (threshold: 0.25)
```

The agent decides to search the knowledge base and retrieves grounded document excerpts:

```console
[AGENT]
Agent input: How do I install CUDA
Agent's thoughts:
Thought: To answer the user's question about installing CUDA, I need to provide them with the correct steps and requirements.

Action: cuda_search__search
Action Input: {'query': 'CUDA installation steps'}
```

The search tool returns structured citations in JSON format:

```console
[AGENT]
Calling tools: cuda_search__search
Tool's input: {'query': 'CUDA installation steps'}
Tool's response:
{"total_results":10,"results":[{"document_id":"","content":"Note\nFor both native as well as cross development,
the toolkit must be installed using the distribution-specific installer...
Download the NVIDIA CUDA Toolkit from https://developer.nvidia.com/cuda-downloads.
Choose the platform you are using and download the NVIDIA CUDA Toolkit...
...(truncated)"},...]}
```

The agent synthesizes a comprehensive, grounded response with specific commands for multiple platforms:

```console
['To install CUDA, you can follow these steps:

1. Verify that you have a CUDA-capable GPU.
2. Download the NVIDIA CUDA Toolkit from https://developer.nvidia.com/cuda-downloads.
3. Install the NVIDIA CUDA Toolkit. The installation steps may vary depending on your operating system.
4. Test that the installed software runs correctly and communicates with the hardware.

For example, on Ubuntu, you can install CUDA using the following commands:
# apt update
# apt install cuda-toolkit

On Windows, you can use the network installer or full installer.

Additionally, you can use Conda to install CUDA:
$ conda install cuda -c nvidia

You can also use pip wheels:
$ python3 -m pip install nvidia-cuda-runtime-cu12']
```
67 changes: 67 additions & 0 deletions examples/RAG/simple_rag/configs/rag_library_mode_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


llms:
nim_llm:
_type: nim
model_name: meta/llama-3.3-70b-instruct
base_url: https://integrate.api.nvidia.com/v1
temperature: 0
max_tokens: 4096
top_p: 1

embedders:
nim_embedder:
_type: nim
model_name: nvidia/llama-3.2-nv-embedqa-1b-v2
base_url: https://integrate.api.nvidia.com/v1
truncate: "END"

retrievers:
cuda_retriever:
_type: milvus_retriever
uri: http://localhost:19530
collection_name: cuda_docs
embedding_model: nim_embedder
top_k: 100

function_groups:
cuda_search:
_type: nvidia_rag_lib
include:
- search
llm: nim_llm
embedder: nim_embedder
retriever: cuda_retriever
topic: NVIDIA CUDA library
collection_names:
- cuda_docs
reranker_top_k: 10
rag_pipeline:
enable_citations: true
default_confidence_threshold: 0.25
ranking:
enable_reranker: true
model_name: nvidia/llama-3.2-nv-rerankqa-1b-v2
query_rewriter:
enabled: true

workflow:
_type: react_agent
tool_names:
- cuda_search
verbose: true
llm_name: nim_llm
1 change: 0 additions & 1 deletion examples/deploy/docker-compose.milvus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,3 @@ services:
networks:
default:
name: nvidia-rag-test

2 changes: 2 additions & 0 deletions packages/nvidia_nat_all/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies = [
"nvidia-nat-opentelemetry",
"nvidia-nat-phoenix",
"nvidia-nat-profiling",
"nvidia-nat-rag-lib",
# nvidia-nat-ragaai cannot be part of all due to conflicts with nvidia-nat-strands
# "nvidia-nat-ragaai",
"nvidia-nat-redis",
Expand Down Expand Up @@ -86,6 +87,7 @@ nvidia-nat-openpipe-art = { workspace = true }
nvidia-nat-opentelemetry = { workspace = true }
nvidia-nat-phoenix = { workspace = true }
nvidia-nat-profiling = { workspace = true }
nvidia-nat-rag-lib = { workspace = true }
nvidia-nat-ragaai = { workspace = true }
nvidia-nat-redis = { workspace = true }
nvidia-nat-s3 = { workspace = true }
Expand Down
1 change: 1 addition & 0 deletions packages/nvidia_nat_rag_lib/LICENSE-3rd-party.txt
1 change: 1 addition & 0 deletions packages/nvidia_nat_rag_lib/LICENSE.md
56 changes: 56 additions & 0 deletions packages/nvidia_nat_rag_lib/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
[build-system]
build-backend = "setuptools.build_meta"
requires = ["setuptools >= 64", "setuptools-scm>=8"]


[tool.setuptools.packages.find]
where = ["src"]
include = ["nat.*"]


[tool.setuptools_scm]
git_describe_command = "git describe --long --first-parent"
root = "../.."


[project]
name = "nvidia-nat-rag-lib"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dislike this package name. Can we ask the RAG blueprint team if there is a better alternative. Like nvidia-nat-foundation-rag, nvidia-nat-rag-bp, or something more specific? Adding _lib in the name is redundant. Everything is a library.

dynamic = ["version"]
dependencies = [
# Keep package version constraints as open as possible to avoid conflicts with other packages. Always define a minimum
# version when adding a new package. If unsure, default to using `~=` instead of `==`. Does not apply to nvidia-nat packages.
# Keep sorted!!!
"nvidia-nat~=1.5",
"nvidia-rag>=2.4.0", # TODO: Update version constraint when nvidia-rag is published to PyPI
]
requires-python = ">=3.11,<3.14"
description = "Subpackage for NVIDIA RAG library in NeMo Agent toolkit"
readme = "src/nat/meta/pypi.md"
keywords = ["ai", "rag", "agents", "retrieval"]
license = { text = "Apache-2.0" }
authors = [{ name = "NVIDIA Corporation" }]
maintainers = [{ name = "NVIDIA Corporation" }]
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]

[project.urls]
documentation = "https://docs.nvidia.com/nemo/agent-toolkit/latest/"
source = "https://github.com/NVIDIA/NeMo-Agent-Toolkit"


[tool.uv]
managed = true
config-settings = { editable_mode = "compat" }


[tool.uv.sources]
nvidia-nat = { workspace = true }
nvidia-rag = { path = "vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl" } # TODO: Remove when nvidia-rag>=2.4.0 is on PyPI


[project.entry-points.'nat.components']
nat_rag_lib = "nat.plugins.rag_lib.register"
Loading
Loading