diff --git a/packages/nvidia_nat_rag/LICENSE.md b/packages/nvidia_nat_rag/LICENSE.md
new file mode 100644
index 0000000000..260cc77d47
--- /dev/null
+++ b/packages/nvidia_nat_rag/LICENSE.md
@@ -0,0 +1,190 @@
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction,
+and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by
+the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all
+other entities that control, are controlled by, or are under common
+control with that entity. For the purposes of this definition,
+"control" means (i) the power, direct or indirect, to cause the
+direction or management of such entity, whether by contract or
+otherwise, or (ii) ownership of fifty percent (50%) or more of the
+outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity
+exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation
+source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical
+transformation or translation of a Source form, including but
+not limited to compiled object code, generated documentation,
+and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or
+Object form, made available under the License, as indicated by a
+copyright notice that is included in or attached to the work
+(an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object
+form, that is based on (or derived from) the Work and for which the
+editorial revisions, annotations, elaborations, or other modifications
+represent, as a whole, an original work of authorship. For the purposes
+of this License, Derivative Works shall not include works that remain
+separable from, or merely link (or bind by name) to the interfaces of,
+the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including
+the original version of the Work and any modifications or additions
+to that Work or Derivative Works thereof, that is intentionally
+submitted to the Licensor for inclusion in the Work by the copyright owner
+or by an individual or Legal Entity authorized to submit on behalf of
+the copyright owner. For the purposes of this definition, "submitted"
+means any form of electronic, verbal, or written communication sent
+to the Licensor or its representatives, including but not limited to
+communication on electronic mailing lists, source code control systems,
+and issue tracking systems that are managed by, or on behalf of, the
+Licensor for the purpose of discussing and improving the Work, but
+excluding communication that is conspicuously marked or otherwise
+designated in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity
+on behalf of whom a Contribution has been received by Licensor and
+subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+copyright license to reproduce, prepare Derivative Works of,
+publicly display, publicly perform, sublicense, and distribute the
+Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+this License, each Contributor hereby grants to You a perpetual,
+worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+(except as stated in this section) patent license to make, have made,
+use, offer to sell, sell, import, and otherwise transfer the Work,
+where such license applies only to those patent claims licensable
+by such Contributor that are necessarily infringed by their
+Contribution(s) alone or by combination of their Contribution(s)
+with the Work to which such Contribution(s) was submitted. If You
+institute patent litigation against any entity (including a
+cross-claim or counterclaim in a lawsuit) alleging that the Work
+or a Contribution incorporated within the Work constitutes direct
+or contributory patent infringement, then any patent licenses
+granted to You under this License for that Work shall terminate
+as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+Work or Derivative Works thereof in any medium, with or without
+modifications, and in Source or Object form, provided that You
+meet the following conditions:
+
+(a) You must give any other recipients of the Work or
+    Derivative Works a copy of this License; and
+
+(b) You must cause any modified files to carry prominent notices
+    stating that You changed the files; and
+
+(c) You must retain, in the Source form of any Derivative Works
+    that You distribute, all copyright, patent, trademark, and
+    attribution notices from the Source form of the Work,
+    excluding those notices that do not pertain to any part of
+    the Derivative Works; and
+
+(d) If the Work includes a "NOTICE" text file as part of its
+    distribution, then any Derivative Works that You distribute must
+    include a readable copy of the attribution notices contained
+    within such NOTICE file, excluding those notices that do not
+    pertain to any part of the Derivative Works, in at least one
+    of the following places: within a NOTICE text file distributed
+    as part of the Derivative Works; within the Source form or
+    documentation, if provided along with the Derivative Works; or,
+    within a display generated by the Derivative Works, if and
+    wherever such third-party notices normally appear. The contents
+    of the NOTICE file are for informational purposes only and
+    do not modify the License. You may add Your own attribution
+    notices within Derivative Works that You distribute, alongside
+    or as an addendum to the NOTICE text from the Work, provided
+    that such additional attribution notices cannot be construed
+    as modifying the License.
+
+You may add Your own copyright statement to Your modifications and
+may provide additional or different license terms and conditions
+for use, reproduction, or distribution of Your modifications, or
+for any such Derivative Works as a whole, provided Your use,
+reproduction, and distribution of the Work otherwise complies with
+the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+any Contribution intentionally submitted for inclusion in the Work
+by You to the Licensor shall be under the terms and conditions of
+this License, without any additional terms or conditions.
+Notwithstanding the above, nothing herein shall supersede or modify
+the terms of any separate license agreement you may have executed
+with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+names, trademarks, service marks, or product names of the Licensor,
+except as required for reasonable and customary use in describing the
+origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+agreed to in writing, Licensor provides the Work (and each
+Contributor provides its Contributions) on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+implied, including, without limitation, any warranties or conditions
+of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any
+risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+whether in tort (including negligence), contract, or otherwise,
+unless required by applicable law (such as deliberate and grossly
+negligent acts) or agreed to in writing, shall any Contributor be
+liable to You for damages, including any direct, indirect, special,
+incidental, or consequential damages of any character arising as a
+result of this License or out of the use or inability to use the
+Work (including but not limited to damages for loss of goodwill,
+work stoppage, computer failure or malfunction, or any and all
+other commercial damages or losses), even if such Contributor
+has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+the Work or Derivative Works thereof, You may choose to offer,
+and charge a fee for, acceptance of support, warranty, indemnity,
+or other liability obligations and/or rights consistent with this
+License. However, in accepting such obligations, You may act only
+on Your own behalf and on Your sole responsibility, not on behalf
+of any other Contributor, and only if You agree to indemnify,
+defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason
+of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+Copyright 2025-2026 NVIDIA CORPORATION & AFFILIATES
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/packages/nvidia_nat_rag/README.md b/packages/nvidia_nat_rag/README.md
new file mode 100644
index 0000000000..dc27f7ef29
--- /dev/null
+++ b/packages/nvidia_nat_rag/README.md
@@ -0,0 +1,200 @@
+# NVIDIA NAT RAG Plugin
+
+This plugin integrates [NVIDIA RAG](https://github.com/NVIDIA-AI-Blueprints/rag) with NeMo Agent Toolkit, providing RAG query and search capabilities for your agent workflows.
+
+## Prerequisites
+
+- Python 3.11+
+- NeMo Agent Toolkit installed
+- Access to NVIDIA AI endpoints (API key required)
+- Milvus vector database running (default: `localhost:19530`)
+
+## Installation
+
+### 1. Install the Plugin
+
+From the NeMo Agent Toolkit root directory:
+
+```bash
+# Activate your virtual environment
+source .venv/bin/activate
+
+# Install the plugin in editable mode
+uv pip install -e packages/nvidia_nat_rag
+```
+
+### 2. Set Environment Variables
+
+```bash
+# Required: NVIDIA API key for embeddings, reranking, and LLM
+export NVIDIA_API_KEY="your-nvidia-api-key"
+
+# Optional: If using custom endpoints
+# export NVIDIA_BASE_URL="https://integrate.api.nvidia.com/v1"
+```
+
+### 3. Start Milvus (Vector Database)
+
+The plugin requires a Milvus instance. You can start one using Docker:
+
+```bash
+# Using Milvus Lite (for development)
+# The plugin will automatically use milvus-lite if installed
+
+# Or start a full Milvus instance with Docker
+docker run -d --name milvus \
+  -p 19530:19530 \
+  -p 9091:9091 \
+  milvusdb/milvus:latest
+```
+
+## Configuration
+
+### Sample Config File
+
+The plugin includes a sample configuration at:
+```
+packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml
+```
+
+### Available Functions
+
+#### `nvidia_rag_query`
+Queries documents using NVIDIA RAG and returns an AI-generated response.
+
+```yaml
+functions:
+  rag_query:
+    _type: nvidia_rag_query
+    config_file: config.yaml          # Path to nvidia_rag config
+    collection_names: ["my_docs"]     # Milvus collection names
+    vdb_endpoint: "http://localhost:19530"
+    use_knowledge_base: true
+    # embedding_endpoint: "localhost:9080"  # Optional: for on-prem embeddings
+```
+
+#### `nvidia_rag_search`
+Searches for relevant document chunks without generating a response.
+
+```yaml
+functions:
+  rag_search:
+    _type: nvidia_rag_search
+    config_file: config.yaml
+    collection_names: ["my_docs"]
+    vdb_endpoint: "http://localhost:19530"
+    reranker_top_k: 3                 # Number of results after reranking
+    vdb_top_k: 20                     # Number of results from vector search
+```
+
+## Usage
+
+### Running a RAG Workflow
+
+```bash
+nat run \
+  --config_file packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml \
+  --input "What is the price of a hammer?"
+```
+
+### Example Workflow Config
+
+```yaml
+functions:
+  rag_query:
+    _type: nvidia_rag_query
+    config_file: config.yaml
+    collection_names: ["product_catalog"]
+    vdb_endpoint: "http://localhost:19530"
+    use_knowledge_base: true
+
+  rag_search:
+    _type: nvidia_rag_search
+    config_file: config.yaml
+    collection_names: ["product_catalog"]
+    vdb_endpoint: "http://localhost:19530"
+    reranker_top_k: 3
+    vdb_top_k: 20
+
+  current_datetime:
+    _type: current_datetime
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: meta/llama-3.1-70b-instruct
+    temperature: 0.0
+
+workflow:
+  _type: react_agent
+  tool_names:
+    - rag_query
+    - rag_search
+    - current_datetime
+  llm_name: nim_llm
+  verbose: true
+```
+
+## Troubleshooting
+
+### Error: Function type `nvidia_rag_query` not found
+
+The plugin is not installed. Run:
+```bash
+uv pip install -e packages/nvidia_nat_rag
+```
+
+### Error: Token limit exceeded
+
+If you get a token limit error, reduce the number of results returned:
+```yaml
+rag_search:
+  _type: nvidia_rag_search
+  reranker_top_k: 1    # Reduce from 3
+  vdb_top_k: 10        # Reduce from 20
+```
+
+This often happens when documents contain large base64-encoded images (charts, figures).
+
+### Error: Connection refused to Milvus
+
+Ensure Milvus is running:
+```bash
+# Check if Milvus is running
+docker ps | grep milvus
+
+# Start Milvus if not running
+docker start milvus
+```
+
+### Error: NVIDIA API key not set
+
+```bash
+export NVIDIA_API_KEY="your-api-key"
+```
+
+## Directory Structure
+
+```
+packages/nvidia_nat_rag/
+├── LICENSE.md
+├── README.md                 # This file
+├── pyproject.toml           # Package configuration
+├── src/
+│   └── nat/
+│       ├── meta/
+│       │   └── pypi.md
+│       └── plugins/
+│           └── rag/
+│               ├── __init__.py
+│               ├── configs/
+│               │   └── config.yml    # Sample config
+│               ├── rag_functions.py  # RAG function implementations
+│               └── register.py       # Plugin registration
+└── vendor/
+    └── nvidia_rag-2.4.0.dev0-py3-none-any.whl  # Vendored dependency
+```
+
+## License
+
+Apache-2.0
diff --git a/packages/nvidia_nat_rag/pyproject.toml b/packages/nvidia_nat_rag/pyproject.toml
new file mode 100644
index 0000000000..a54acba8a7
--- /dev/null
+++ b/packages/nvidia_nat_rag/pyproject.toml
@@ -0,0 +1,58 @@
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = ["setuptools >= 64", "setuptools-scm>=8"]
+
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["nat.*"]
+
+
+[tool.setuptools_scm]
+git_describe_command = "git describe --long --first-parent"
+root = "../.."
+
+
+[project]
+name = "nvidia-nat-rag"
+dynamic = ["version"]
+dependencies = [
+  # Keep package version constraints as open as possible to avoid conflicts with other packages. Always define a minimum
+  # version when adding a new package. If unsure, default to using `~=` instead of `==`. Does not apply to nvidia-nat packages.
+  # Keep sorted!!!
+  "langgraph>=0.2",  # Required for react_agent workflow
+  "langchain_classic",
+  "nvidia-nat~=1.5",
+  "nvidia-rag[rag]~=2.4",
+]
+requires-python = ">=3.11,<3.14"
+description = "Subpackage for NVIDIA RAG library integration in NeMo Agent toolkit"
+readme = "src/nat/meta/pypi.md"
+keywords = ["ai", "rag", "agents"]
+license = { text = "Apache-2.0" }
+authors = [{ name = "NVIDIA Corporation" }]
+maintainers = [{ name = "NVIDIA Corporation" }]
+classifiers = [
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+]
+
+[project.urls]
+documentation = "https://docs.nvidia.com/nemo/agent-toolkit/latest/"
+source = "https://github.com/NVIDIA/NeMo-Agent-Toolkit"
+
+
+[tool.uv]
+managed = true
+config-settings = { editable_mode = "compat" }
+
+
+[tool.uv.sources]
+nvidia-nat = { workspace = true }
+nvidia-rag = { path = "vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl" }
+
+
+[project.entry-points.'nat.components']
+nat_rag = "nat.plugins.rag.register"
diff --git a/packages/nvidia_nat_rag/src/nat/meta/pypi.md b/packages/nvidia_nat_rag/src/nat/meta/pypi.md
new file mode 100644
index 0000000000..ab7f4b2682
--- /dev/null
+++ b/packages/nvidia_nat_rag/src/nat/meta/pypi.md
@@ -0,0 +1,35 @@
+# NVIDIA NeMo Agent Toolkit - RAG Plugin
+
+This package provides integration between NVIDIA NeMo Agent Toolkit and the NVIDIA RAG library.
+
+## Features
+
+- **RAG Query**: Query documents using RAG with configurable LLM and embeddings
+- **RAG Search**: Search for relevant documents in vector database collections
+
+## Installation
+
+```bash
+pip install nvidia-nat-rag
+```
+
+## Usage
+
+Add the RAG tools to your NAT workflow configuration:
+
+```yaml
+functions:
+  rag_query:
+    _type: nvidia_rag_query
+    collection_names: ["my_collection"]
+    vdb_endpoint: "http://localhost:19530"
+    
+workflow:
+  _type: react_agent
+  tool_names: [rag_query]
+  llm_name: nim_llm
+```
+
+## Documentation
+
+For more information, see the [NeMo Agent Toolkit documentation](https://docs.nvidia.com/nemo/agent-toolkit/latest/).
diff --git a/packages/nvidia_nat_rag/src/nat/plugins/rag/__init__.py b/packages/nvidia_nat_rag/src/nat/plugins/rag/__init__.py
new file mode 100644
index 0000000000..1312ed324b
--- /dev/null
+++ b/packages/nvidia_nat_rag/src/nat/plugins/rag/__init__.py
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""NVIDIA RAG integration for NeMo Agent Toolkit."""
diff --git a/packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml b/packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml
new file mode 100644
index 0000000000..814f8b0ef9
--- /dev/null
+++ b/packages/nvidia_nat_rag/src/nat/plugins/rag/configs/config.yml
@@ -0,0 +1,45 @@
+# NVIDIA RAG Integration - Sample Configuration
+# This configuration demonstrates how to use NVIDIA RAG tools with NeMo Agent Toolkit
+
+# RAG configuration file path (used by nvidia_rag library)
+# Make sure this file exists and is properly configured for your environment
+# See: https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/config.yaml
+
+functions:
+  # RAG Query Tool - Query documents and get AI-generated responses
+  rag_query:
+    _type: nvidia_rag_query
+    config_file: config.yaml  # Path to nvidia_rag config file
+    collection_names: ["test_library"]
+    vdb_endpoint: "http://localhost:19530"
+    use_knowledge_base: true
+    # embedding_endpoint: "localhost:9080"  # Uncomment for on-prem embeddings
+
+  # RAG Search Tool - Search for relevant document chunks
+  rag_search:
+    _type: nvidia_rag_search
+    config_file: config.yaml
+    collection_names: ["test_library"]
+    vdb_endpoint: "http://localhost:19530"
+    reranker_top_k: 3  # Reduced to avoid token limit issues with large documents
+    vdb_top_k: 20
+
+  # Utility tool
+  current_datetime:
+    _type: current_datetime
+
+llms:
+  nim_llm:
+    _type: nim
+    model_name: meta/llama-3.1-70b-instruct
+    temperature: 0.0
+
+workflow:
+  _type: react_agent
+  tool_names:
+    - rag_query
+    - rag_search
+    - current_datetime
+  llm_name: nim_llm
+  verbose: true
+  parse_agent_response_max_retries: 3
diff --git a/packages/nvidia_nat_rag/src/nat/plugins/rag/rag_functions.py b/packages/nvidia_nat_rag/src/nat/plugins/rag/rag_functions.py
new file mode 100644
index 0000000000..e9109c0c83
--- /dev/null
+++ b/packages/nvidia_nat_rag/src/nat/plugins/rag/rag_functions.py
@@ -0,0 +1,213 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""NVIDIA RAG query and search functions for NeMo Agent Toolkit."""
+
+import json
+import logging
+from typing import Optional
+
+from pydantic import Field
+
+from nat.builder.builder import Builder
+from nat.builder.function_info import FunctionInfo
+from nat.cli.register_workflow import register_function
+from nat.data_models.function import FunctionBaseConfig
+
+logger = logging.getLogger(__name__)
+
+
+class NvidiaRAGQueryConfig(FunctionBaseConfig, name="nvidia_rag_query"):
+    """
+    Tool that queries documents using NVIDIA RAG.
+    Sends a chat-style query to the RAG system using configured models and endpoints.
+    """
+
+    config_file: str = Field(
+        default="config.yaml",
+        description="Path to the NVIDIA RAG configuration YAML file.",
+    )
+    collection_names: list[str] = Field(
+        default_factory=list,
+        description="List of collection names to query from.",
+    )
+    vdb_endpoint: str = Field(
+        default="http://localhost:19530",
+        description="Vector database endpoint URL.",
+    )
+    embedding_endpoint: Optional[str] = Field(
+        default=None,
+        description="Embedding endpoint URL. If None, uses cloud embeddings.",
+    )
+    use_knowledge_base: bool = Field(
+        default=True,
+        description="Whether to use the knowledge base for RAG.",
+    )
+
+
+@register_function(config_type=NvidiaRAGQueryConfig)
+async def nvidia_rag_query(config: NvidiaRAGQueryConfig, builder: Builder):
+    """Register the NVIDIA RAG query tool."""
+
+    from nvidia_rag import NvidiaRAG
+    from nvidia_rag.utils.configuration import NvidiaRAGConfig
+
+    # Initialize the RAG client
+    rag_config = NvidiaRAGConfig.from_yaml(config.config_file)
+    rag = NvidiaRAG(config=rag_config)
+
+    async def _nvidia_rag_query(query: str) -> str:
+        """Query documents using NVIDIA RAG and return a generated response.
+
+        This tool sends a query to the RAG system which retrieves relevant documents
+        from the vector database and uses an LLM to generate a response.
+
+        Args:
+            query: The question or query to ask the RAG system.
+
+        Returns:
+            The generated response from the RAG system based on retrieved documents.
+        """
+        try:
+            response = await rag.generate(
+                messages=[{"role": "user", "content": query}],
+                use_knowledge_base=config.use_knowledge_base,
+                collection_names=config.collection_names,
+                embedding_endpoint=config.embedding_endpoint,
+            )
+
+            if response.status_code != 200:
+                return f"Error: RAG query failed with status code {response.status_code}"
+
+            # Extract the response content from the streaming generator
+            full_response = []
+            async for chunk in response.generator:
+                if chunk.startswith("data: "):
+                    chunk = chunk[len("data: "):].strip()
+                if not chunk:
+                    continue
+                try:
+                    data = json.loads(chunk)
+                    choices = data.get("choices", [])
+                    if choices:
+                        delta = choices[0].get("delta", {})
+                        text = delta.get("content")
+                        if not text:
+                            message = choices[0].get("message", {})
+                            text = message.get("content", "")
+                        if text:
+                            full_response.append(text)
+                except json.JSONDecodeError:
+                    continue
+
+            return "".join(full_response) if full_response else "No response generated."
+
+        except Exception as e:
+            logger.error("RAG query failed: %s", str(e))
+            return f"Error querying RAG: {str(e)}"
+
+    yield FunctionInfo.from_fn(
+        _nvidia_rag_query,
+        description=_nvidia_rag_query.__doc__,
+    )
+
+
+class NvidiaRAGSearchConfig(FunctionBaseConfig, name="nvidia_rag_search"):
+    """
+    Tool that searches for relevant documents in the vector database using NVIDIA RAG.
+    """
+
+    config_file: str = Field(
+        default="config.yaml",
+        description="Path to the NVIDIA RAG configuration YAML file.",
+    )
+    collection_names: list[str] = Field(
+        default_factory=list,
+        description="List of collection names to search in.",
+    )
+    vdb_endpoint: str = Field(
+        default="http://localhost:19530",
+        description="Vector database endpoint URL.",
+    )
+    embedding_endpoint: Optional[str] = Field(
+        default=None,
+        description="Embedding endpoint URL. If None, uses cloud embeddings.",
+    )
+    reranker_top_k: int = Field(
+        default=10,
+        description="Number of top results to return after reranking.",
+    )
+    vdb_top_k: int = Field(
+        default=100,
+        description="Number of top results to retrieve from vector database before reranking.",
+    )
+
+
+@register_function(config_type=NvidiaRAGSearchConfig)
+async def nvidia_rag_search(config: NvidiaRAGSearchConfig, builder: Builder):
+    """Register the NVIDIA RAG search tool."""
+
+    from nvidia_rag import NvidiaRAG
+    from nvidia_rag.utils.configuration import NvidiaRAGConfig
+
+    # Initialize the RAG client
+    rag_config = NvidiaRAGConfig.from_yaml(config.config_file)
+    rag = NvidiaRAG(config=rag_config)
+
+    async def _nvidia_rag_search(query: str) -> str:
+        """Search for relevant documents in the vector database.
+
+        This tool performs a semantic search in the vector database collections
+        and returns relevant document chunks.
+
+        Args:
+            query: The search query to find relevant documents.
+
+        Returns:
+            A formatted string containing the search results with document names and content.
+        """
+        try:
+            citations = await rag.search(
+                query=query,
+                collection_names=config.collection_names,
+                reranker_top_k=config.reranker_top_k,
+                vdb_top_k=config.vdb_top_k,
+                embedding_endpoint=config.embedding_endpoint,
+            )
+
+            if not citations or not hasattr(citations, "results") or not citations.results:
+                return "No documents found for the given query."
+
+            # Format the results
+            results = []
+            for idx, citation in enumerate(citations.results):
+                doc_name = getattr(citation, "document_name", f"Document {idx + 1}")
+                content = getattr(citation, "content", "")
+                doc_type = getattr(citation, "document_type", "text")
+                description = getattr(citation, "metadata", {}).description
+                print(description)
+
+                results.append(f"**{doc_name}** (type: {doc_type}):\n{description}")
+
+            return "\n\n---\n\n".join(results)
+
+        except Exception as e:
+            logger.error("RAG search failed: %s", str(e))
+            return f"Error searching documents: {str(e)}"
+
+    yield FunctionInfo.from_fn(
+        _nvidia_rag_search,
+        description=_nvidia_rag_search.__doc__,
+    )
diff --git a/packages/nvidia_nat_rag/src/nat/plugins/rag/register.py b/packages/nvidia_nat_rag/src/nat/plugins/rag/register.py
new file mode 100644
index 0000000000..ccbd87e44d
--- /dev/null
+++ b/packages/nvidia_nat_rag/src/nat/plugins/rag/register.py
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# flake8: noqa
+
+# Import all functions to register them with NAT
+from nat.plugins.rag import rag_functions
diff --git a/packages/nvidia_nat_rag/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl b/packages/nvidia_nat_rag/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl
new file mode 100644
index 0000000000..e2187d675c
Binary files /dev/null and b/packages/nvidia_nat_rag/vendor/nvidia_rag-2.4.0.dev0-py3-none-any.whl differ