diff --git a/.gitattributes b/.gitattributes index c8d189184..0a7e469ce 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,4 @@ data/dataset.zip filter=lfs diff=lfs merge=lfs -text data/ filter=lfs diff=lfs merge=lfs -text +examples/rag_event_ingest/data/**/*.mp4 filter=lfs diff=lfs merge=lfs -text +examples/rag_event_ingest/data/**/*.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/publish-artifacts.yml b/.github/workflows/publish-artifacts.yml index 2be3979e1..7cb97dbe2 100644 --- a/.github/workflows/publish-artifacts.yml +++ b/.github/workflows/publish-artifacts.yml @@ -7,6 +7,16 @@ on: - cron: '30 18 * * *' workflow_dispatch: inputs: + JOBS_TO_RUN: + description: 'Jobs to run (manual trigger only)' + required: true + default: 'all' + type: choice + options: + - all + - wheel-only + - containers-only + - helm-chart-only CONTAINER_TAG: description: 'Custom tag for containers (optional)' required: false @@ -15,6 +25,26 @@ on: description: 'Artifactory version (optional, defaults to auto-generated from get_version.sh)' required: false default: '' + HELM_CHART_VERSION: + description: 'Helm chart version for NGC (optional, defaults to auto-generated from get_version.sh)' + required: false + default: '' + # Container-level selection (applies when JOBS_TO_RUN is 'all' or 'containers-only') + PUBLISH_RAG_SERVER: + description: 'Publish rag-server container' + required: false + default: true + type: boolean + PUBLISH_INGESTOR_SERVER: + description: 'Publish ingestor-server container' + required: false + default: true + type: boolean + PUBLISH_RAG_FRONTEND: + description: 'Publish rag-frontend container' + required: false + default: true + type: boolean env: RELEASE_TYPE: dev @@ -26,6 +56,7 @@ jobs: publish-wheel: name: Build and Publish Python Wheel runs-on: ubuntu-latest + if: github.event_name != 'workflow_dispatch' || github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'wheel-only' container: image: python:3.10 steps: @@ -106,6 +137,7 @@ jobs: publish-rag-server: name: Build and Publish RAG Server Container runs-on: ubuntu-latest + if: github.event_name != 'workflow_dispatch' || ((github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'containers-only') && github.event.inputs.PUBLISH_RAG_SERVER != 'false') steps: - name: Checkout code uses: actions/checkout@v4 @@ -147,7 +179,7 @@ jobs: # Tag and push to NGC Container Registry echo "Pushing rag-server to NGC Container Registry..." docker push nvcr.io/nvstaging/blueprint/rag-server:$TAG - docker tag nvcr.io/nvstaging/blueprint/rag-server:$TAG nvcr.io/nvstaging/blueprint/rag-server:latest + docker tag nvcr.io/nvidia/blueprint/rag-server:$TAG nvcr.io/nvstaging/blueprint/rag-server:latest docker push nvcr.io/nvstaging/blueprint/rag-server:latest echo "RAG server container publishing completed successfully" @@ -164,6 +196,7 @@ jobs: publish-ingestor-server: name: Build and Publish Ingestor Server Container runs-on: ubuntu-latest + if: github.event_name != 'workflow_dispatch' || ((github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'containers-only') && github.event.inputs.PUBLISH_INGESTOR_SERVER != 'false') steps: - name: Checkout code uses: actions/checkout@v4 @@ -205,7 +238,7 @@ jobs: # Tag and push to NGC Container Registry echo "Pushing ingestor-server to NGC Container Registry..." docker push nvcr.io/nvstaging/blueprint/ingestor-server:$TAG - docker tag nvcr.io/nvstaging/blueprint/ingestor-server:$TAG nvcr.io/nvstaging/blueprint/ingestor-server:latest + docker tag nvcr.io/nvidia/blueprint/ingestor-server:$TAG nvcr.io/nvstaging/blueprint/ingestor-server:latest docker push nvcr.io/nvstaging/blueprint/ingestor-server:latest echo "Ingestor server container publishing completed successfully" @@ -222,6 +255,7 @@ jobs: publish-rag-frontend: name: Build and Publish RAG Frontend Container runs-on: ubuntu-latest + if: github.event_name != 'workflow_dispatch' || ((github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'containers-only') && github.event.inputs.PUBLISH_RAG_FRONTEND != 'false') steps: - name: Checkout code uses: actions/checkout@v4 @@ -263,7 +297,7 @@ jobs: # Tag and push to NGC Container Registry echo "Pushing rag-frontend to NGC Container Registry..." docker push nvcr.io/nvstaging/blueprint/rag-frontend:$TAG - docker tag nvcr.io/nvstaging/blueprint/rag-frontend:$TAG nvcr.io/nvstaging/blueprint/rag-frontend:latest + docker tag nvcr.io/nvidia/blueprint/rag-frontend:$TAG nvcr.io/nvstaging/blueprint/rag-frontend:latest docker push nvcr.io/nvstaging/blueprint/rag-frontend:latest echo "RAG frontend container publishing completed successfully" @@ -274,3 +308,83 @@ jobs: docker images | grep "rag-frontend" | awk '{print $3}' | xargs -r docker rmi -f || echo "No rag-frontend images to delete" docker system prune -f || true + # ============================================================================ + # PUBLISH HELM CHART TO NGC + # ============================================================================ + publish-helm-chart: + name: Build and Publish Helm Chart to NGC + runs-on: ubuntu-latest + if: github.event_name != 'workflow_dispatch' || github.event.inputs.JOBS_TO_RUN == 'all' || github.event.inputs.JOBS_TO_RUN == 'helm-chart-only' + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install Helm + uses: azure/setup-helm@v4 + with: + version: 'v3.17.0' + + - name: Install NGC CLI + env: + NGC_API_KEY: ${{ secrets.CI_NVSTAGING_BLUEPRINT_KEY }} + run: | + echo "Installing NGC CLI..." + wget --content-disposition https://api.ngc.nvidia.com/v2/resources/nvidia/ngc-apps/ngc_cli/versions/4.9.10/files/ngccli_linux.zip -O ngccli_linux.zip + unzip -o ngccli_linux.zip + chmod u+x ngc-cli/ngc + echo "$(pwd)/ngc-cli" >> $GITHUB_PATH + echo "NGC CLI installed successfully" + + - name: Determine Helm chart version + id: helm_version + run: | + if [ -n "${{ github.event.inputs.HELM_CHART_VERSION }}" ]; then + echo "Using custom Helm chart version: ${{ github.event.inputs.HELM_CHART_VERSION }}" + VERSION="${{ github.event.inputs.HELM_CHART_VERSION }}" + else + echo "Using auto-generated version from get_version.sh" + chmod +x ./ci/get_version.sh + VERSION=$(./ci/get_version.sh) + echo "Generated version: $VERSION" + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "HELM_CHART_VERSION=$VERSION" >> $GITHUB_ENV + + - name: Add Helm repositories + env: + NGC_API_KEY: ${{ secrets.CI_NVSTAGING_BLUEPRINT_KEY }} + run: | + cd deploy/helm + helm repo add nvidia-nim https://helm.ngc.nvidia.com/nim/nvidia/ --username='$oauthtoken' --password="$NGC_API_KEY" + helm repo add nim https://helm.ngc.nvidia.com/nim/ --username='$oauthtoken' --password="$NGC_API_KEY" + helm repo add nemo-microservices https://helm.ngc.nvidia.com/nvidia/nemo-microservices --username='$oauthtoken' --password="$NGC_API_KEY" + helm repo add baidu-nim https://helm.ngc.nvidia.com/nim/baidu --username='$oauthtoken' --password="$NGC_API_KEY" + helm repo add bitnami https://charts.bitnami.com/bitnami + helm repo add elastic https://helm.elastic.co + helm repo add otel https://open-telemetry.github.io/opentelemetry-helm-charts + helm repo add zipkin https://zipkin.io/zipkin-helm + helm repo add prometheus https://prometheus-community.github.io/helm-charts + helm repo update + + - name: Package Helm chart + env: + NGC_API_KEY: ${{ secrets.CI_NVSTAGING_BLUEPRINT_KEY }} + run: | + cd deploy/helm + helm dependency update nvidia-blueprint-rag + helm package nvidia-blueprint-rag/ --version "${{ env.HELM_CHART_VERSION }}" + CHART_TGZ=$(ls nvidia-blueprint-rag-*.tgz) + echo "Created: $CHART_TGZ" + + - name: Push Helm chart to NGC + env: + NGC_API_KEY: ${{ secrets.CI_NVSTAGING_BLUEPRINT_KEY }} + run: | + cd deploy/helm + CHART_TGZ="nvidia-blueprint-rag-${{ env.HELM_CHART_VERSION }}.tgz" + TARGET="nvstaging/blueprint/nvidia-blueprint-rag:${{ env.HELM_CHART_VERSION }}" + # Remove existing version to overwrite (ignore error if version does not exist) + ngc registry chart remove "$TARGET" --org nvstaging -y 2>/dev/null || true + ngc registry chart push "$TARGET" --source "$CHART_TGZ" --org nvstaging + echo "Helm chart published to NGC: $TARGET" + diff --git a/.gitignore b/.gitignore index 9dded62bf..3611412e3 100644 --- a/.gitignore +++ b/.gitignore @@ -80,4 +80,9 @@ coverage/ cover/ *.log tests/data/ +# Agent skills (installed via npx skills add) +/.agents/ +/.claude/ +skills-lock.json + # Workbench Project Layout \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..183677906 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,86 @@ +# NVIDIA RAG Blueprint + +Reference implementation for a Retrieval Augmented Generation pipeline. Python 3.11+ backend (FastAPI + LangChain), React/TypeScript frontend, deployable via Docker Compose or Helm. + +## Project structure + +``` +src/nvidia_rag/ +├── rag_server/ # RAG query/response server (FastAPI) +├── ingestor_server/ # Document ingestion server (FastAPI) +└── utils/ # Shared utilities +frontend/ # React + TypeScript UI (pnpm) +deploy/ +├── compose/ # Docker Compose files and env configs +└── helm/ # Helm charts (standard + MIG-slicing) +docs/ # User-facing documentation (Sphinx, RST/MD) +tests/ +├── unit/ # No network calls allowed +└── integration/ # Network calls permitted +notebooks/ # Jupyter notebooks for evaluation and examples +``` + +## Development commands + +### Backend (Python) + +```bash +uv sync # Install all deps +uv run pytest tests/unit/ # Unit tests +uv run pytest tests/integration/ # Integration tests +ruff check --fix src/ # Lint + autofix +ruff format src/ # Format +pre-commit run --all-files # Run all pre-commit hooks +``` + +### Frontend (TypeScript) + +```bash +cd frontend +pnpm install +pnpm run dev # Dev server +pnpm run lint # ESLint +pnpm exec tsc --noEmit # Type check +pnpm run test:run # Tests +``` + +## Code conventions + +- **Python**: Ruff for linting and formatting (line-length 88, double quotes, space indent). Config in `pyproject.toml`. +- **Type hints**: Required on all function signatures. +- **Imports**: Sorted by isort via Ruff. No in-function imports. +- **Tests**: Mirror source tree (`src/nvidia_rag/rag_server/server.py` → `tests/unit/rag_server/test_server.py`). +- **Frontend**: ESLint + TypeScript strict mode. Function components with hooks. +- **Env files**: `deploy/compose/nvdev.env` (NVIDIA-hosted NIMs) and `deploy/compose/.env` (self-hosted). These are the source of truth for Docker deployments — shell-only exports are lost on restart. + +## Deployment modes + +1. **Docker Compose** — `deploy/compose/` with env-file configs. Multiple profiles: standard, retrieval-only, NVIDIA-hosted. +2. **Helm** — `deploy/helm/nvidia-blueprint-rag/` chart with `values.yaml`. Supports MIG GPU slicing via `deploy/helm/mig-slicing/`. +3. **Library** — Import `nvidia_rag` as a Python package for custom pipelines. + +## Key files + +- `pyproject.toml` — All Python deps, ruff config, project metadata +- `deploy/compose/nvdev.env` — Default env file for NVIDIA API Catalog deployments +- `src/nvidia_rag/rag_server/prompt.yaml` — System prompt templates +- `docs/support-matrix.md` — GPU requirements per deployment mode +- `docs/service-port-gpu-reference.md` — Port mappings and GPU assignments + +## PR and commit guidelines + +- Target the `develop` branch, never `main`. +- All commits must be signed off (DCO). +- Run `pre-commit run --all-files` before submitting. +- See `CONTRIBUTING.md` for full workflow. + +## Operations — `rag-blueprint` skill + +For any operational task — deploying, configuring, troubleshooting, or shutting down the RAG Blueprint — read and follow the skill at `.agents/skills/rag-blueprint/SKILL.md`. + +The skill handles: + +- **Deploy** — Docker Compose (standard, retrieval-only, NVIDIA-hosted), Helm, MIG-slicing, library mode +- **Configure** — VLM, guardrails, query rewriting, ingestion, search & retrieval, models, observability, summarization, multimodal, MCP, evaluation, notebooks, UI, and more +- **Troubleshoot** — Debug unhealthy services, container errors, GPU issues, connectivity failures +- **Shutdown** — Stop, tear down, and clean up services diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..e16f0c9d6 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,84 @@ +# NVIDIA RAG Blueprint + +Reference implementation for a Retrieval Augmented Generation pipeline. Python 3.11+ backend (FastAPI + LangChain), React/TypeScript frontend, deployable via Docker Compose or Helm. + +## Project structure + +``` +src/nvidia_rag/ +├── rag_server/ # RAG query/response server (FastAPI) +├── ingestor_server/ # Document ingestion server (FastAPI) +└── utils/ # Shared utilities +frontend/ # React + TypeScript UI (pnpm) +deploy/ +├── compose/ # Docker Compose files and env configs +└── helm/ # Helm charts (standard + MIG-slicing) +docs/ # User-facing documentation (Sphinx, RST/MD) +tests/ +├── unit/ # No network calls allowed +└── integration/ # Network calls permitted +notebooks/ # Jupyter notebooks for evaluation and examples +``` + +## Development commands + +### Backend (Python) + +```bash +uv sync # Install all deps +uv run pytest tests/unit/ # Unit tests +uv run pytest tests/integration/ # Integration tests +ruff check --fix src/ # Lint + autofix +ruff format src/ # Format +pre-commit run --all-files # Run all pre-commit hooks +``` + +### Frontend (TypeScript) + +```bash +cd frontend +pnpm install +pnpm run dev # Dev server +pnpm run lint # ESLint +pnpm exec tsc --noEmit # Type check +pnpm run test:run # Tests +``` + +## Code conventions + +- **Python**: Ruff for linting and formatting (line-length 88, double quotes, space indent). Config in `pyproject.toml`. +- **Type hints**: Required on all function signatures. +- **Imports**: Sorted by isort via Ruff. No in-function imports. +- **Tests**: Mirror source tree (`src/nvidia_rag/rag_server/server.py` → `tests/unit/rag_server/test_server.py`). +- **Frontend**: ESLint + TypeScript strict mode. Function components with hooks. +- **Env files**: `deploy/compose/nvdev.env` (NVIDIA-hosted NIMs) and `deploy/compose/.env` (self-hosted). These are the source of truth for Docker deployments — shell-only exports are lost on restart. + +## Deployment modes + +1. **Docker Compose** — `deploy/compose/` with env-file configs. Multiple profiles: standard, retrieval-only, NVIDIA-hosted. +2. **Helm** — `deploy/helm/nvidia-blueprint-rag/` chart with `values.yaml`. Supports MIG GPU slicing via `deploy/helm/mig-slicing/`. +3. **Library** — Import `nvidia_rag` as a Python package for custom pipelines. + +## Key files + +- `pyproject.toml` — All Python deps, ruff config, project metadata +- `deploy/compose/nvdev.env` — Default env file for NVIDIA API Catalog deployments +- `src/nvidia_rag/rag_server/prompt.yaml` — System prompt templates +- `docs/support-matrix.md` — GPU requirements per deployment mode +- `docs/service-port-gpu-reference.md` — Port mappings and GPU assignments + +## PR and commit guidelines + +- Target the `develop` branch, never `main`. +- All commits must be signed off (DCO). +- Run `pre-commit run --all-files` before submitting. +- See `CONTRIBUTING.md` for full workflow. + +## Operations — `/rag-blueprint` skill + +For any operational task, use the `rag-blueprint` skill (`.agents/skills/rag-blueprint/`). + +- **Deploy** — Docker Compose (standard, retrieval-only, NVIDIA-hosted), Helm, MIG-slicing, library mode +- **Configure** — VLM, guardrails, query rewriting, ingestion, search & retrieval, models, observability, summarization, multimodal, MCP, evaluation, notebooks, UI, and more +- **Troubleshoot** — Debug unhealthy services, container errors, GPU issues, connectivity failures +- **Shutdown** — Stop, tear down, and clean up services diff --git a/README.md b/README.md index edea2e72a..c400dd410 100644 --- a/README.md +++ b/README.md @@ -105,9 +105,9 @@ This modular design ensures efficient query processing, accurate retrieval of in - [NVIDIA NIM llama-3_2-nv-embedqa-1b-v2](https://build.nvidia.com/nvidia/llama-3_2-nv-embedqa-1b-v2) - [NVIDIA NIM llama-3_2-nv-rerankqa-1b-v2](https://build.nvidia.com/nvidia/llama-3_2-nv-rerankqa-1b-v2) - - [NeMo Retriever Page Elements NIM](https://build.nvidia.com/nvidia/nemoretriever-page-elements-v3) - - [NeMo Retriever Table Structure NIM](https://build.nvidia.com/nvidia/nemoretriever-table-structure-v1) - - [NeMo Retriever Graphic Elements NIM](https://build.nvidia.com/nvidia/nemoretriever-graphic-elements-v1) + - [NeMo Retriever Page Elements NIM](https://build.nvidia.com/nvidia/nemotron-page-elements-v3) + - [NeMo Retriever Table Structure NIM](https://build.nvidia.com/nvidia/nemotron-table-structure-v1) + - [NeMo Retriever Graphic Elements NIM](https://build.nvidia.com/nvidia/nemotron-graphic-elements-v1) - [NeMo Retriever OCR NIM](https://build.nvidia.com/nvidia/nemoretriever-ocr) - Optional NIMs @@ -162,6 +162,29 @@ The following is a step-by-step explanation of the workflow from the end-user pe +## AI Agent Skill + +An agent skill is included that enables AI coding assistants (Claude Code, Cursor, etc.) to deploy, configure, troubleshoot, and manage the RAG Blueprint autonomously. + +### Install + +```bash +npx skills add . +``` + +This installs the `rag-blueprint` skill from `skill-source/`. After installation, the agent handles requests like: + +- *"Deploy RAG on Docker with NVIDIA-hosted models"* +- *"Enable VLM image captioning and restart the ingestor"* +- *"Ingestion failed for 3 files, can you check why?"* +- *"Switch from Docker to library mode"* +- *"Shut down all RAG services"* + +> **Note:** If the agent doesn't pick up the skill automatically (e.g., for short or ambiguous queries), invoke it explicitly with `/rag-blueprint `. + +For skill architecture details, see [`skill-source/README.md`](skill-source/README.md). + + ## Get Started With NVIDIA RAG Blueprint The recommended way to get started is to deploy the NVIDIA RAG Blueprint @@ -202,9 +225,9 @@ Use of the models in this blueprint is governed by the [NVIDIA AI Foundation Mod ## Terms of Use This blueprint is governed by the [NVIDIA Agreements | Enterprise Software | NVIDIA Software License Agreement](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/) and the [NVIDIA Agreements | Enterprise Software | Product Specific Terms for AI Product](https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/). The models are governed by the [NVIDIA Agreements | Enterprise Software | NVIDIA Community Model License](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-community-models-license/) and the [NVIDIA RAG dataset](./data/multimodal/) which is governed by the [NVIDIA Asset License Agreement](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/data/LICENSE.DATA). -The following models that are built with Llama are governed by the Llama 3.2 Community License Agreement: nvidia/llama-3.2-nv-embedqa-1b-v2 and nvidia/llama-3.2-nv-rerankqa-1b-v2 and llama-3.2-nemoretriever-1b-vlm-embed-v1. +The following models that are built with Llama are governed by the Llama 3.2 Community License Agreement: nvidia/llama-nemotron-embed-1b-v2 and nvidia/llama-nemotron-rerank-1b-v2 and llama-3.2-nemoretriever-1b-vlm-embed-v1. ## Additional Information -The [Llama 3.1 Community License Agreement](https://www.llama.com/llama3_1/license/) for the llama-3.1-nemotron-nano-vl-8b-v1, llama-3.1-nemoguard-8b-content-safety and llama-3.1-nemoguard-8b-topic-control models. The [Llama 3.2 Community License Agreement](https://www.llama.com/llama3_2/license/) for the nvidia/llama-3.2-nv-embedqa-1b-v2, nvidia/llama-3.2-nv-rerankqa-1b-v2 and llama-3.2-nemoretriever-1b-vlm-embed-v1 models. The [Llama 3.3 Community License Agreement](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE) for the llama-3.3-nemotron-super-49b-v1.5 models. Built with Llama. Apache 2.0 for NVIDIA Ingest and for the nemoretriever-page-elements-v2, nemoretriever-table-structure-v1, nemoretriever-graphic-elements-v1, paddleocr and nemoretriever-ocr-v1 models. +The [Llama 3.1 Community License Agreement](https://www.llama.com/llama3_1/license/) for the llama-3.1-nemotron-nano-vl-8b-v1, llama-3.1-nemoguard-8b-content-safety and llama-3.1-nemoguard-8b-topic-control models. The [Llama 3.2 Community License Agreement](https://www.llama.com/llama3_2/license/) for the nvidia/llama-nemotron-embed-1b-v2, nvidia/llama-nemotron-rerank-1b-v2 and llama-3.2-nemoretriever-1b-vlm-embed-v1 models. The [Llama 3.3 Community License Agreement](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/LICENSE) for the llama-3.3-nemotron-super-49b-v1.5 models. Built with Llama. Apache 2.0 for NVIDIA Ingest and for the nemoretriever-page-elements-v2, nemotron-table-structure-v1, nemotron-graphic-elements-v1, paddleocr and nemoretriever-ocr-v1 models. diff --git a/ci/publish_wheel.sh b/ci/publish_wheel.sh index f59165fee..711abb462 100755 --- a/ci/publish_wheel.sh +++ b/ci/publish_wheel.sh @@ -25,8 +25,8 @@ if [ -n "$ARTIFACTORY_VERSION" ]; then echo "Using custom Artifactory version: $ARTIFACTORY_VERSION" ARTIFACTORY_VERSION_FINAL=$ARTIFACTORY_VERSION else - echo "Using default Artifactory version: 2.4.0.dev" - ARTIFACTORY_VERSION_FINAL="2.4.0.dev" + echo "Using default Artifactory version: 2.5.0.dev" + ARTIFACTORY_VERSION_FINAL="2.5.0.dev" fi # Build first wheel for GitLab Package Registry diff --git a/deploy/compose/.env b/deploy/compose/.env index 9f6ccf796..cc80cfaf0 100644 --- a/deploy/compose/.env +++ b/deploy/compose/.env @@ -22,8 +22,8 @@ export NVIDIA_API_KEY=${NGC_API_KEY} export APP_LLM_SERVERURL=nim-llm:8000 export APP_FILTEREXPRESSIONGENERATOR_SERVERURL=nim-llm:8000 export SUMMARY_LLM_SERVERURL=nim-llm:8000 -export APP_EMBEDDINGS_SERVERURL=nemoretriever-embedding-ms:8000/v1 -export APP_RANKING_SERVERURL=nemoretriever-ranking-ms:8000 +export APP_EMBEDDINGS_SERVERURL=nemotron-embedding-ms:8000/v1 +export APP_RANKING_SERVERURL=nemotron-ranking-ms:8000 export OCR_GRPC_ENDPOINT=nemoretriever-ocr:8001 export OCR_HTTP_ENDPOINT=http://nemoretriever-ocr:8000/v1/infer export OCR_INFER_PROTOCOL=grpc @@ -50,11 +50,11 @@ export YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=grpc # export OCR_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr # export OCR_INFER_PROTOCOL=http # export OCR_MODEL_NAME=scene_text_ensemble -# export YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3 +# export YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3 # export YOLOX_INFER_PROTOCOL=http -# export YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-graphic-elements-v1 +# export YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1 # export YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=http -# export YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-table-structure-v1 +# export YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1 # export YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=http # export APP_QUERYREWRITER_SERVERURL="" # export APP_QUERYREWRITER_MODELNAME="nvidia/llama-3.3-nemotron-super-49b-v1.5" diff --git a/deploy/compose/docker-compose-ingestor-server.yaml b/deploy/compose/docker-compose-ingestor-server.yaml index 964a9c02c..1d284d53d 100644 --- a/deploy/compose/docker-compose-ingestor-server.yaml +++ b/deploy/compose/docker-compose-ingestor-server.yaml @@ -3,7 +3,7 @@ services: # Main ingestor server which is responsible for ingestion ingestor-server: container_name: ingestor-server - image: nvcr.io/nvstaging/blueprint/ingestor-server:${TAG:-2.4.0} + image: nvcr.io/nvidia/blueprint/ingestor-server:${TAG:-2.5.0} build: # Set context to repo's root directory context: ../../ @@ -75,8 +75,8 @@ services: ##===Embedding Model specific configurations=== # url on which embedding model is hosted. If "", Nvidia hosted API is used - APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemoretriever-embedding-ms:8000/v1"} - APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-3.2-nv-embedqa-1b-v2} + APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemotron-embedding-ms:8000/v1"} + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-nemotron-embed-1b-v2} # For VLM Embedding Model (Nemoretriever-1b-vlm-embed-v1) # APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemotron-vlm-embedding-ms:8000/v1"} # APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-nemotron-embed-vl-1b-v2} @@ -95,7 +95,8 @@ services: APP_NVINGEST_EXTRACTPAGEASIMAGE: ${APP_NVINGEST_EXTRACTPAGEASIMAGE:-False} APP_NVINGEST_STRUCTURED_ELEMENTS_MODALITY: ${APP_NVINGEST_STRUCTURED_ELEMENTS_MODALITY:-""} # Select from "image", "text_image" APP_NVINGEST_IMAGE_ELEMENTS_MODALITY: ${APP_NVINGEST_IMAGE_ELEMENTS_MODALITY:-""} # Select from "image" - APP_NVINGEST_PDFEXTRACTMETHOD: ${APP_NVINGEST_PDFEXTRACTMETHOD:-None} # Select from pdfium, nemoretron_parse, None + APP_NVINGEST_PDFEXTRACTMETHOD: ${APP_NVINGEST_PDFEXTRACTMETHOD:-None} # Select from pdfium, nemotron_parse, None + APP_NVINGEST_EXTRACTTABLESMETHOD: ${APP_NVINGEST_EXTRACTTABLESMETHOD:-yolox} # yolox, nemotron_parse, or None # Extract text by "page" only recommended for documents with pages like .pdf, .docx, etc. APP_NVINGEST_TEXTDEPTH: ${APP_NVINGEST_TEXTDEPTH:-page} # extract by "page" or "document" @@ -168,7 +169,7 @@ services: - "6379:6379" nv-ingest-ms-runtime: - image: nvcr.io/nvidia/nemo-microservices/nv-ingest:26.1.1 + image: nvcr.io/nvidia/nemo-microservices/nv-ingest:26.1.2 # cpuset: "0-15" # Uncomment to restrict this container to CPU cores 0–15 shm_size: 40gb # Should be at minimum 30% of assigned memory per Ray documentation volumes: @@ -234,13 +235,13 @@ services: - YOLOX_HTTP_ENDPOINT=${YOLOX_HTTP_ENDPOINT:-http://page-elements:8000/v1/infer} - YOLOX_INFER_PROTOCOL=${YOLOX_INFER_PROTOCOL:-grpc} # build.nvidia.com hosted yolox-graphics-elements endpoints. - #- YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-graphic-elements-v1 + #- YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1 #- YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=http - YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT=${YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT:-graphic-elements:8001} - YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=${YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT:-http://graphic-elements:8000/v1/infer} - YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=${YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL:-grpc} # build.nvidia.com hosted yolox-table-elements endpoints. - #- YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-table-structure-v1 + #- YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1 #- YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=http - YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT=${YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT:-table-structure:8001} - YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=${YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT:-http://table-structure:8000/v1/infer} diff --git a/deploy/compose/docker-compose-rag-server.yaml b/deploy/compose/docker-compose-rag-server.yaml index b3e20808f..dc04c5329 100644 --- a/deploy/compose/docker-compose-rag-server.yaml +++ b/deploy/compose/docker-compose-rag-server.yaml @@ -3,7 +3,7 @@ services: # Main orchestrator server which stiches together all calls to different services to fulfill the user request rag-server: container_name: rag-server - image: nvcr.io/nvstaging/blueprint/rag-server:${TAG:-2.4.0} + image: nvcr.io/nvidia/blueprint/rag-server:${TAG:-2.5.0} build: # Set context to repo's root directory context: ../../ @@ -74,11 +74,11 @@ services: LLM_MAX_TOKENS: ${LLM_MAX_TOKENS:-32768} LLM_TEMPERATURE: ${LLM_TEMPERATURE:-0} LLM_TOP_P: ${LLM_TOP_P:-1.0} - - # Enable/disable thinking/reasoning for nemotron-3-nano models (30b variant) - # Set to "true" to enable reasoning mode with reasoning_budget - # Set to "false" to disable reasoning and get direct answers - ENABLE_NEMOTRON_3_NANO_THINKING: ${ENABLE_NEMOTRON_3_NANO_THINKING:-true} + + # Reasoning configuration (supported by Nemotron 3 and other reasoning models) + LLM_ENABLE_THINKING: ${LLM_ENABLE_THINKING:-false} + LLM_REASONING_BUDGET: ${LLM_REASONING_BUDGET:-0} + LLM_LOW_EFFORT: ${LLM_LOW_EFFORT:-false} ##===Query Rewriter Model specific configurations=== APP_QUERYREWRITER_MODELNAME: ${APP_QUERYREWRITER_MODELNAME:-"nvidia/llama-3.3-nemotron-super-49b-v1.5"} @@ -94,8 +94,8 @@ services: ##===Embedding Model specific configurations=== # url on which embedding model is hosted. If "", Nvidia hosted API is used - APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemoretriever-embedding-ms:8000/v1"} - APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-3.2-nv-embedqa-1b-v2} + APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemotron-embedding-ms:8000/v1"} + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-nemotron-embed-1b-v2} APP_EMBEDDINGS_DIMENSIONS: ${APP_EMBEDDINGS_DIMENSIONS:-2048} # For VLM Embedding Model (Nemoretriever-1b-vlm-embed-v1) # APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemotron-vlm-embedding-ms:8000/v1"} @@ -103,8 +103,8 @@ services: ##===Reranking Model specific configurations=== # url on which ranking model is hosted. If "", Nvidia hosted API is used - APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL-"nemoretriever-ranking-ms:8000"} - APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/llama-3.2-nv-rerankqa-1b-v2"} + APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL-"nemotron-ranking-ms:8000"} + APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/llama-nemotron-rerank-1b-v2"} ENABLE_RERANKER: ${ENABLE_RERANKER:-True} # Default score threshold for filtering documents by reranker relevance (0.0 to 1.0) RERANKER_SCORE_THRESHOLD: ${RERANKER_SCORE_THRESHOLD:-${RERANKER_CONFIDENCE_THRESHOLD:-0.0}} @@ -211,7 +211,7 @@ services: # Sample UI container which interacts with APIs exposed by rag-server container rag-frontend: container_name: rag-frontend - image: nvcr.io/nvstaging/blueprint/rag-frontend:${TAG:-2.4.0} + image: nvcr.io/nvidia/blueprint/rag-frontend:${TAG:-2.5.0} build: # Set context to repo's root directory context: ../../frontend diff --git a/deploy/compose/nemoguardrails/config-store/nemoguard_cloud/config.yml b/deploy/compose/nemoguardrails/config-store/nemoguard_cloud/config.yml index 1e014fc9f..17200db05 100644 --- a/deploy/compose/nemoguardrails/config-store/nemoguard_cloud/config.yml +++ b/deploy/compose/nemoguardrails/config-store/nemoguard_cloud/config.yml @@ -17,5 +17,7 @@ rails: - content safety check input $model=content_safety - topic safety check input $model=topic_control output: + streaming: + enabled: true flows: - content safety check output $model=content_safety \ No newline at end of file diff --git a/deploy/compose/nemotron3-super-cloud.env b/deploy/compose/nemotron3-super-cloud.env new file mode 100644 index 000000000..468bd2fb7 --- /dev/null +++ b/deploy/compose/nemotron3-super-cloud.env @@ -0,0 +1,49 @@ +# ============================================================================== +# Nemotron 3 Super - NVIDIA-hosted (cloud) endpoints +# ============================================================================== +# Self-contained cloud + Nemotron 3 Super. Source after .env so cloud endpoints +# override on-prem defaults: source deploy/compose/.env && source deploy/compose/nemotron3-super-cloud.env +# No need to edit .env (uncomment/comment sections). +# ============================================================================== + +# === Authentication === +export NVIDIA_API_KEY=${NGC_API_KEY} + +# === Embeddings, Ranking, OCR, YOLOX (cloud) === +export APP_EMBEDDINGS_SERVERURL=https://integrate.api.nvidia.com/v1 +export APP_RANKING_SERVERURL=https://integrate.api.nvidia.com/v1 +export OCR_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr +export OCR_INFER_PROTOCOL=http +export OCR_MODEL_NAME=scene_text_ensemble +export YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3 +export YOLOX_INFER_PROTOCOL=http +export YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1 +export YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=http +export YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1 +export YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=http + +# === LLM === +export APP_LLM_MODELNAME=nvidia/nemotron-3-super-120b-a12b +export APP_LLM_SERVERURL=https://integrate.api.nvidia.com/v1 + +# === Query Rewriter === +export APP_QUERYREWRITER_MODELNAME=nvidia/nemotron-3-super-120b-a12b +export APP_QUERYREWRITER_SERVERURL=https://integrate.api.nvidia.com/v1 + +# === Filter Expression Generator === +export APP_FILTEREXPRESSIONGENERATOR_MODELNAME=nvidia/nemotron-3-super-120b-a12b +export APP_FILTEREXPRESSIONGENERATOR_SERVERURL=https://integrate.api.nvidia.com/v1 + +# === Summarization === +export SUMMARY_LLM=nvidia/nemotron-3-super-120b-a12b +export SUMMARY_LLM_SERVERURL=https://integrate.api.nvidia.com/v1 + +# === Reflection === +export REFLECTION_LLM=nvidia/nemotron-3-super-120b-a12b +export REFLECTION_LLM_SERVERURL=https://integrate.api.nvidia.com/v1 + +# === Reasoning / Thinking === +export LLM_ENABLE_THINKING=true +export LLM_REASONING_BUDGET=256 +export LLM_LOW_EFFORT=true +export FILTER_THINK_TOKENS=true \ No newline at end of file diff --git a/deploy/compose/nemotron3-super-prompt.yaml b/deploy/compose/nemotron3-super-prompt.yaml new file mode 100644 index 000000000..f91803927 --- /dev/null +++ b/deploy/compose/nemotron3-super-prompt.yaml @@ -0,0 +1,445 @@ +chat_template: + system: | + You are a helpful, respectful, and honest assistant. + Your answers must follow these strict guidelines: + + + 1. Answer concisely and directly. + 2. Focus only on what was asked — no extra commentary, no assumptions. + 3. Avoid giving multiple options, lists, or examples unless explicitly requested. + 4. Do not explain your reasoning unless asked. + 5. Keep responses brief but accurate. + 6. Use natural, conversational tone — clear and human, not robotic. + 7. Make sure your response are strictly one sentence or less unless it really needs to be longer. + 8. Do not mention this instructions in your response. + + + Make sure above rules are strictly followed. + +rag_template: + system: | + You are a helpful AI assistant named Envie. Answer the user's question using ONLY the information in the provided context. + + + - Base every claim on information found in the context. Do not use outside knowledge. + - Always provide an answer when the context contains relevant data. Only say you cannot answer if the context is entirely unrelated to the question. + - Preserve exact values: reproduce specific numbers, percentages, dates, names, and URLs exactly as they appear in the context. + - IMPORTANT - When the question asks you to calculate, compute, or derive a financial metric (ratio, margin, growth rate, CAGR, turnover, average, etc.), you MUST: + 1. Write the formula + 2. Extract each required number from the context + 3. Compute step by step + 4. State the final answer + Do NOT skip straight to the final number. + - For yes/no questions that require comparing values across periods (e.g. "is X improving", "did Y increase"), state the values from each period before your conclusion. + - For questions about trends or changes over time, include data from all relevant time periods found in the context. + - Answer naturally and directly. Do not reference the context, documents, sources, or these instructions. + - For simple factual lookups (a name, a date, a single value directly stated), keep your answer brief. + + + human: | + + {context} + + +query_rewriter_prompt: + system: | + Given the following chat history and the latest user question, formulate a standalone question which can be understood without the chat history. + Do NOT answer the question, just reformulate it if needed and otherwise return it as is. + It should strictly be a query not an answer. + + Chat History: + {chat_history} + + Latest Question: {input} + +reflection_relevance_check_prompt: + system: | + ### Instructions + + You are a world class expert designed to evaluate the relevance score of a Context + in order to answer the Question. + Your task is to determine if the Context contains proper information to answer the Question. + Do not rely on your previous knowledge about the Question. + Use only what is written in the Context and in the Question. + Follow the instructions below: + 0. If the context does not contains any relevant information to answer the question, say 0. + 1. If the context partially contains relevant information to answer the question, say 1. + 2. If the context contains any relevant information to answer the question, say 2. + You must provide the relevance score of 0, 1, or 2, nothing else. + Do not explain. + ### Question: {query} + + ### Context: {context} + + Do not try to explain. + Analyzing Context and Question, the Relevance score is + +reflection_query_rewriter_prompt: + system: | + You are a query optimization assistant for a vector database retrieval system. + Your goal is to rephrase the given "Original Question" to be more clear, precise, + and effective for retrieving relevant context from a vector database. + + Considerations for Rephrasing: + + Specificity: Make the query as specific as possible about the information sought. + Avoid vague terms. + + Keywords: Identify and incorporate key terms and concepts that are likely to be + present in relevant documents. + + Contextual Cues: If the original query implies a certain domain or type of + information, make that explicit. + + Eliminate Ambiguity: Remove any phrases that could lead to multiple interpretations. + + Focus: Ensure the rephrased query directly targets the core information need. + + Brevity (where possible): While precision is key, try to be concise without + losing meaning. + + Only output the rewritten question with no other information. + + Original Question: {query} + + Rewritten Question: + +reflection_groundedness_check_prompt: + system: | + ### Instruction + + You are a world class expert designed to evaluate the groundedness of an assertion. + You will be provided with an assertion and a context. + Your task is to determine if the assertion is supported by the context. + Follow the instructions below: + A. If there is no context or no assertion or context is empty or assertion is empty, say 0. + B. If the assertion is not supported by the context, say 0. + C. If the assertion is partially supported by the context, say 1. + D. If the assertion is fully supported by the context, say 2. + You must provide a rating of 0, 1, or 2, nothing else. + + ### Context: + <{context}> + + ### Assertion: + <{response}> + + Analyzing Context and Response, the Groundedness score is + +reflection_response_regeneration_prompt: + system: | + You are tasked with creating a new "Response" based solely on the provided + "Context" and "Query". Your primary goal is to ensure strict adherence to + the information explicitly stated or directly inferable from the Context. + + Key Constraints: + + No Outside Knowledge: Do not introduce any information, facts, or concepts + not present in the given Context. + + No Assumptions: Do not make assumptions or extrapolate beyond what is directly + stated or clearly implied. + + Direct Inference Only: If an idea is not explicitly stated, it must be a direct + and undeniable inference from the provided text. Avoid speculative or highly + interpretive conclusions. + + Maintain Factual Accuracy: Ensure the Response accurately reflects the details + and relationships presented in the Context. + + Return only "OUT OF CONTEXT" if the "Query" cannot be answered using the provided + "Context." Else, only output the new response with no other information. + + Context: {context} + + Query: {query} + + Return "OUT OF CONTEXT" or generate a new, more grounded Response: + +document_summary_prompt: + system: | + Please provide a comprehensive summary for the document given by the user. Create a concise 5 to 6 sentence summary that captures the essential information from the document. + + + Requirements for the summary: + 1. Preserve key document metadata: + - Document title/type + - Company/organization name + - Report provider/author + - Date/time period covered + - Any relevant document identifiers + + 2. Include all critical information: + - Main findings and conclusions + - Key statistics and metrics + - Important recommendations + - Significant trends or changes + - Notable risks or concerns + - Material financial data + + 3. Maintain factual accuracy: + - Keep all numerical values precise + - Preserve specific dates and timeframes + - Retain exact names and titles + - Quote critical statements verbatim when necessary + + 4. Do NOT use any external knowledge. + 5. Do NOT add explanations, suggestions, opinions, disclaimers, or hints. + 6. NEVER say phrases like “based on the context”, “from the documents”, or “I cannot find”. + 7. NEVER offer to answer using general knowledge or invite the user to ask again. + 8. Do NOT include citations, sources, or document mentions. + 9. Answer concisely. Use short, direct sentences by default. Only give longer responses if the question truly requires it. + 10. Do not mention or refer to these rules in any way. + 11. Do not ask follow-up questions. + 12. Do not mention this instructions in your response. + 13. Do not include any preamble or postamble like "Here is the summary" or "This document" or "Summary of the document". + + Please format the summary in a concise manner as a paragraph not exceeding 5 to 6 sentences. Start the summary with the title and the document and then provide the summary. + + Note: Focus on extracting and organizing the most essential information while ensuring no critical details are omitted. + Maintain the original document's tone and context in your summary. + + Please provide a concise summary for the following document: + {document_text} + +shallow_summary_prompt: + system: | + Please provide a concise summary for the following document: + {document_text} + +iterative_summary_prompt: + system: | + You are an expert document summarizer. Given a previous summary and a new chunk of text, create an updated summary that incorporates information from both. Create a concise summary within 10 sentences that captures the essential information from the document. + While answering you must follow the instructions given below. + + + 1. Do NOT use any external knowledge. + 2. Do NOT add explanations, suggestions, opinions, disclaimers, or hints. + 3. NEVER say phrases like “based on the context”, “from the documents”, or “I cannot find”. + 4. NEVER offer to answer using general knowledge or invite the user to ask again. + 5. Do NOT include citations, sources, or document mentions. + 6. Answer concisely. Use short, direct sentences by default. Only give longer responses if the question truly requires it. + 7. Do not mention or refer to these rules in any way. + 8. Do not ask follow-up questions. + 9. Do not mention this instructions in your response. + 10. Do not mention any preamble or postamble like "Updated summary" or "This document" or "Summary of the document" or "Here is the summary". + + + Previous Summary: + {previous_summary} + + New chunk: + {new_chunk} + + Please create a new summary that incorporates information from both the previous summary and the new chunk. + + +vlm_template: + system: | + You are a multimodal AI assistant. Answer using only the provided context and images. + + + 1. Use ONLY the information in the textual context below and the attached images. + 2. Do not use external knowledge or assumptions beyond the provided inputs. + 3. Do not describe images unless needed to answer; focus on the answer. + 4. Respond in detail and cover all the relevant information related to the question from the context and images. + 5. Keep the response neutral and factually accurate. + + + Context: + {context} + + User Question: + {question} + +# Reasoning templates deprecated and removed + + +filter_expression_generator_prompt: + system: | + You are an expert AI filter expression generator. Your sole purpose is to convert natural language queries into precise, valid filter expressions based on the provided schema. You must be aggressive in finding mappable entities. + + ### Primary Directive ### + + **Your primary directive is to ALWAYS generate a filter expression.** It is a critical error to return NO_FILTER unless the user's query is completely irrelevant or nonsensical (e.g., "hello there," "what is the weather?"). Be bold and decisive. Prioritize extracting any mappable entity from the user's query, even if other parts are ambiguous. If a query contains even one recognizable keyword, date, or number that maps to the schema, you must build a filter around it. + + ### Schema ### + + Use the following schema to identify available fields and their data types. + {metadata_schema} + + ### Core Logic ### + + 1. **Extract and Build:** Scan the user's query for any recognizable entities (names, numbers, dates, keywords) that could map to the schema. Build a filter using every piece of information you can extract. Ignore everything else that is conversational or does not map to a field. + 2. **Field Format:** The field format is always content_metadata["field_name"]. + 3. **Operators:** Use uppercase logical operators: AND, OR, NOT. Use parentheses () to group expressions. + + ### Operators & Data Types (Complete List) ### + + 1. **String**: ==, !=, in, like + * Example: content_metadata["doc_type"] in ["report", "summary"] + 2. **Number**: ==, !=, >, >=, <, <=, in, between + * Example: content_metadata["page_count"] > 10 + 3. **Datetime** (Format: YYYY-MM-DDTHH:MM:SS): ==, !=, >, >=, <, <= + * Example: content_metadata["created_at"] >= "2024-01-01T00:00:00" + 4. **Boolean**: ==, != + * Example: content_metadata["is_public"] == true + 5. **Array**: array_contains, array_contains_any, array_contains_all, array_length + * Single value: array_contains(content_metadata["category"], "AI") + * Multiple values (any): array_contains_any(content_metadata["regions"], ["EMEA", "APAC"]) + * Multiple values (all): array_contains_all(content_metadata["tags"], ["urgent", "review"]) + + ### Intelligent Mapping Examples ### + + * **Query:** "Project X" + * **Action:** Recognizes "Project X" as a single mappable entity and builds a filter. + * **Output:** content_metadata["project"] == "Project X" + * **Query:** "approved" + * **Action:** Recognizes "approved" as a status and builds a filter just for that. + * **Output:** content_metadata["status"] == "approved" + * **Query:** "Find the latest financial reports for Project X" + * **Action:** Ignore "latest" as it's subjective. Extract "financial reports" and "Project X". + * **Output:** (content_metadata["doc_type"] == "financial_report" AND content_metadata["project"] == "Project X") + * **Query:** "I think I need the document from Q2 last year about compliance" + * **Action:** Ignore "I think I need". Extract "Q2 last year" (2024) and "compliance". + * **Output:** (content_metadata["created_at"] >= "2024-04-01T00:00:00" AND content_metadata["created_at"] < "2024-07-01T00:00:00" AND array_contains(content_metadata["tags"], "compliance")) + + ### Your Task ### + + Convert the following user query into a filter expression. + {user_request} + + ### Response Format ### + + Your response **MUST** be only the raw filter expression string and nothing else. Do not use explanations, comments, or markdown. + + 1. **On Success:** The filter expression string. + * content_metadata["year"] == 2024 + + 2. **On Absolute Failure:** The exact text NO_FILTER. + * **Use this ONLY if the query is completely unrelated to the schema**, like "what is your name?" or "tell me a joke". + + 3. **On Logical Conflict:** The exact text UNSUPPORTED. + * **Use this ONLY for impossible logic**, like "year is 2022 and year is 2023". + +query_decomposition_multiquery_prompt: + system: | + You are an AI assistant designed to break down a user's complex question into a list of simpler, focused subqueries. + The purpose of this decomposition is to improve the accuracy of a retrieval-augmented generation (RAG) system. + + + 1. Analyze the user's main question to identify its key components. + 2. Decompose the question into 1-3 distinct, self-contained subqueries. + 3. If the original question is simple and already focused, return query directly. + 4. Each subquery should be a clear, direct question that, when answered, contributes to a comprehensive response to the original question. + 5. Avoid creating redundant or overly broad subqueries. Focus on the core information needed to answer the original prompt + + + Return only the subqueries as a numbered list, without any additional text. + Original question: {question} + +query_decompositions_query_rewriter_prompt: + system: | + You are an expert at rewriting queries to improve information retrieval for a conversational AI system. Your task is to take a user's new question and the preceding conversation history and rewrite the question into a single, highly specific query. This new query should be ideal for a search or retrieval system. + + + 1. Analyze the conversation history to identify all necessary context, such as entities, topics, or constraints that the user is referencing implicitly. + 2. Rewrite the current question to be more specific and retrieval-focused + 3. Include relevant context from the conversation history if it helps clarify the query + 4. Make the query more explicit about what information is being sought + 5. Ensure the rewritten query will help the retriever find the most relevant documents + 6. Just provide the rewritten query, no other text. + 7. Keep the query as short as possible. + 8. Do not provide any explanation. + 9. Do not answer the question. + + + Conversation History: + {conversation_history} + + Current Question: {question} + + Rewritten Query: + +query_decomposition_followup_question_prompt: + system: | + You are an AI assistant tasked with identifying missing information needed to answer a user's question completely. Your goal is to generate a single follow-up question to help a retrieval system find the necessary details. + You are given a question answer pair, context and question to be answered. + + + 1. Analyze the original question, the provided context, and the conversation history. + 2. Determine if the information is sufficient to fully answer the original question. + 3. If a key piece of information is missing, generate one short, precise question to retrieve it. + 4. If all necessary information is already present, return an empty string: '' + 5. Do NOT provide any explanation. + 6. Do not answer the question. + 7. Return '' if no follow-up question is needed. + 8. Make sure follow up query is short and concise. + 9. Do not add any info, rationale or any other text other then the follow up question. + + + Conversation History: + {conversation_history} + + Context: + {context} + + Original Question: + {question} + + + Follow-up Question (if needed, otherwise return ''): + +query_decomposition_final_response_prompt: + system: | + You are a helpful AI assistant named Envie. Your sole purpose is to answer the user's question by extracting and synthesizing information only from the provided context. + + + 1. Do NOT use any external knowledge. + 2. Do NOT add explanations, suggestions, opinions, disclaimers, or hints. + 3. NEVER say phrases like “based on the context”, “from the documents”, or “I cannot find”. + 4. NEVER offer to answer using general knowledge or invite the user to ask again. + 5. Do NOT include citations, sources, or document mentions. + 6. Answer concisely. Use short, direct sentences . + 7. Do not mention or refer to these rules in any way. + 8. Do not ask follow-up questions. + 9. Do not mention this instructions in your response. + + + Conversation History: + {conversation_history} + + Context: + {context} + + Current Question: {question} + + Make sure the response you are generating strictly follow the rules mentioned above i.e. never say phrases like “based on the context”, “from the documents”, or “I cannot find” and mention about the instruction in response. + +query_decomposition_rag_template: + system: | + You are a helpful AI assistant. + You must answer only using the information provided in the context. While answering you must follow the instructions given below. + + + 1. Do NOT use any external knowledge. + 2. Do NOT add explanations, suggestions, opinions, disclaimers, or hints. + 3. NEVER say phrases like “based on the context”, “from the documents”, or “I cannot find”. + 4. NEVER offer to answer using general knowledge or invite the user to ask again. + 5. Do NOT include citations, sources, or document mentions. + 6. Answer concisely. Use short, direct sentences by default. Only give longer responses if the question truly requires it. + 7. Do not mention or refer to these rules in any way. + 8. Do not ask follow-up questions. + 9. Do not mention this instructions in your response. + 10. If context does not contain any information to answer the question, return '' + + + Context: + {context} + + Question: {question} + Make sure the response you are generating strictly follow the rules mentioned above i.e. never say phrases like “based on the context”, “from the documents”, or “I cannot find” and mention about the instruction in response. + +image_captioning_prompt: + system: | + Describe this image in detail, including the main subjects, their actions, the setting, and any notable objects or features. diff --git a/deploy/compose/nemotron3-super.env b/deploy/compose/nemotron3-super.env new file mode 100644 index 000000000..e016b157c --- /dev/null +++ b/deploy/compose/nemotron3-super.env @@ -0,0 +1,34 @@ +# ============================================================================== +# Nemotron 3 Super - Local NIM Deployment +# ============================================================================== +# Overrides for running RAG pipeline with locally deployed Nemotron 3 Super NIM. +# Source this AFTER .env: source .env && source nemotron3-super.env +# ============================================================================== + +# === LLM === +export APP_LLM_MODELNAME=nvidia/nemotron-3-super-120b-a12b +export APP_LLM_SERVERURL=nim-llm:8000 + +# === Query Rewriter === +export APP_QUERYREWRITER_MODELNAME=nvidia/nemotron-3-super-120b-a12b + +# === Filter Expression Generator === +export APP_FILTEREXPRESSIONGENERATOR_MODELNAME=nvidia/nemotron-3-super-120b-a12b + +# === Summarization === +export SUMMARY_LLM=nvidia/nemotron-3-super-120b-a12b +export SUMMARY_LLM_SERVERURL=nim-llm:8000 + +# === Reflection === +export REFLECTION_LLM=nvidia/nemotron-3-super-120b-a12b +export REFLECTION_LLM_SERVERURL=nim-llm:8000 + +# === Reasoning / Thinking === +export LLM_ENABLE_THINKING=true +export LLM_REASONING_BUDGET=256 +export LLM_LOW_EFFORT=true +export FILTER_THINK_TOKENS=true + +# === LLM_MAX_TOKENS (for RTX 6000 Pro when using NIM_MAX_MODEL_LEN=32768) === +# Uncomment and set: 16256 +# export LLM_MAX_TOKENS=16256 diff --git a/deploy/compose/nims.yaml b/deploy/compose/nims.yaml index f376d9a64..2bca3dce2 100644 --- a/deploy/compose/nims.yaml +++ b/deploy/compose/nims.yaml @@ -31,9 +31,9 @@ services: retries: 100 profiles: ["", "rag"] - nemoretriever-embedding-ms: - container_name: nemoretriever-embedding-ms - image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.1 + nemotron-embedding-ms: + container_name: nemotron-embedding-ms + image: nvcr.io/nim/nvidia/llama-nemotron-embed-1b-v2:1.13.0 volumes: - ${MODEL_DIRECTORY:-./}:/opt/nim/.cache ports: @@ -91,9 +91,9 @@ services: start_period: 10m profiles: ["vlm-embed", "vlm-ingest"] - nemoretriever-ranking-ms: - container_name: nemoretriever-ranking-ms - image: nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:1.8.0 + nemotron-ranking-ms: + container_name: nemotron-ranking-ms + image: nvcr.io/nim/nvidia/llama-nemotron-rerank-1b-v2:1.10.0 volumes: - ${MODEL_DIRECTORY:-./}:/opt/nim/.cache ports: @@ -108,6 +108,7 @@ services: interval: 10s timeout: 20s retries: 100 + shm_size: 16GB deploy: resources: reservations: @@ -119,7 +120,7 @@ services: profiles: ["", "rag", "vlm-generation"] page-elements: - image: ${YOLOX_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-page-elements-v3}:${YOLOX_TAG:-1.7.0} + image: ${YOLOX_IMAGE:-nvcr.io/nim/nvidia/nemotron-page-elements-v3}:${YOLOX_TAG:-1.8.0} shm_size: 16gb ports: - "8000:8000" @@ -157,7 +158,7 @@ services: profiles: ["", "ingest", "vlm-ingest"] graphic-elements: - image: ${YOLOX_GRAPHIC_ELEMENTS_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-graphic-elements-v1}:${YOLOX_GRAPHIC_ELEMENTS_TAG:-1.6.0} + image: ${YOLOX_GRAPHIC_ELEMENTS_IMAGE:-nvcr.io/nim/nvidia/nemotron-graphic-elements-v1}:${YOLOX_GRAPHIC_ELEMENTS_TAG:-1.8.0} shm_size: 16gb ports: - "8003:8000" @@ -183,7 +184,7 @@ services: profiles: ["", "ingest", "vlm-ingest"] table-structure: - image: ${YOLOX_TABLE_STRUCTURE_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-table-structure-v1}:${YOLOX_TABLE_STRUCTURE_TAG:-1.6.0} + image: ${YOLOX_TABLE_STRUCTURE_IMAGE:-nvcr.io/nim/nvidia/nemotron-table-structure-v1}:${YOLOX_TABLE_STRUCTURE_TAG:-1.8.0} shm_size: 16gb ports: - "8006:8000" @@ -323,6 +324,7 @@ services: interval: 10s timeout: 20s retries: 100 + shm_size: 16GB deploy: resources: reservations: diff --git a/deploy/compose/nvdev.env b/deploy/compose/nvdev.env index b92e4500b..d5a919153 100644 --- a/deploy/compose/nvdev.env +++ b/deploy/compose/nvdev.env @@ -20,24 +20,24 @@ export APP_LLM_MODELNAME=nvidia/llama-3.3-nemotron-super-49b-v1.5 # export APP_LLM_MODELNAME=nvidia/nemotron-3-nano-30b-a3b # Note: For locally deployed nemotron-3-nano, use: nvidia/nemotron-3-nano export APP_FILTEREXPRESSIONGENERATOR_MODELNAME=nvidia/llama-3.3-nemotron-super-49b-v1.5 -export APP_EMBEDDINGS_MODELNAME=nvdev/nvidia/llama-3.2-nv-embedqa-1b-v2 +export APP_EMBEDDINGS_MODELNAME=nvidia/llama-nemotron-embed-1b-v2 # For VLM Embedding Model (Nemoretriever-1b-vlm-embed-v1) # export APP_EMBEDDINGS_MODELNAME=nvdev/nvidia/llama-nemotron-embed-vl-1b-v2 -export APP_RANKING_MODELNAME=nvidia/llama-3.2-nv-rerankqa-1b-v2 +export APP_RANKING_MODELNAME=nvidia/llama-nemotron-rerank-1b-v2 export ENABLE_RERANKER=True export APP_EMBEDDINGS_SERVERURL=https://integrate.api.nvidia.com/v1 export APP_LLM_SERVERURL="" export APP_FILTEREXPRESSIONGENERATOR_SERVERURL="" export APP_RANKING_SERVERURL="" -# export APP_RANKING_SERVERURL=https://ai.api.nvidia.com/v1/nvdev/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking/v1 +# export APP_RANKING_SERVERURL=https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-nemotron-rerank-1b-v2/reranking export OCR_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr export OCR_INFER_PROTOCOL=http export OCR_MODEL_NAME=scene_text_ensemble -export YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3 +export YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3 export YOLOX_INFER_PROTOCOL=http -export YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvdev/nvidia/nemoretriever-graphic-elements-v1 +export YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1 export YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=http -export YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvdev/nvidia/nemoretriever-table-structure-v1 +export YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1 export YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=http export SUMMARY_LLM="nvidia/llama-3.3-nemotron-super-49b-v1.5" export SUMMARY_LLM_SERVERURL="" diff --git a/deploy/helm/mig-slicing/mig-config.yaml b/deploy/helm/mig-slicing/mig-config-h100.yaml similarity index 100% rename from deploy/helm/mig-slicing/mig-config.yaml rename to deploy/helm/mig-slicing/mig-config-h100.yaml diff --git a/deploy/helm/mig-slicing/mig-config-rtx6000.yaml b/deploy/helm/mig-slicing/mig-config-rtx6000.yaml new file mode 100644 index 000000000..14272b497 --- /dev/null +++ b/deploy/helm/mig-slicing/mig-config-rtx6000.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: custom-mig-config +data: + config.yaml: | + version: v1 + mig-configs: + all-disabled: + - devices: all + mig-enabled: false + + custom-rtx6000-4x1g24-2x1g24-1x2g48-1x4g96: + - devices: [0] + mig-enabled: true + mig-devices: + "1g.24gb": 4 + - devices: [1] + mig-enabled: true + mig-devices: + "1g.24gb": 2 + "2g.48gb": 1 + - devices: [2] + mig-enabled: true + mig-devices: + "4g.96gb": 1 diff --git a/deploy/helm/mig-slicing/values-mig.yaml b/deploy/helm/mig-slicing/values-mig-h100.yaml similarity index 100% rename from deploy/helm/mig-slicing/values-mig.yaml rename to deploy/helm/mig-slicing/values-mig-h100.yaml diff --git a/deploy/helm/mig-slicing/values-mig-rtx6000.yaml b/deploy/helm/mig-slicing/values-mig-rtx6000.yaml new file mode 100644 index 000000000..e7ae285da --- /dev/null +++ b/deploy/helm/mig-slicing/values-mig-rtx6000.yaml @@ -0,0 +1,114 @@ +# MIG-optimized resource configuration for RAG Blueprint +# This file only overrides GPU resource requirements to use MIG slices + +# NV-Ingest configuration +nv-ingest: + # Milvus - uses 1g.24gb MIG slice + milvus: + standalone: + resources: + limits: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + requests: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + + # NV-Ingest NIM Operator overrides + nimOperator: + # Page Elements - uses 1g.24gb + page_elements: + resources: + limits: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + requests: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + storage: + pvc: + storageClass: "" + + # Graphic Elements - uses 1g.24gb + graphic_elements: + resources: + limits: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + requests: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + storage: + pvc: + storageClass: "" + + # Table Structure - uses 1g.24gb + table_structure: + resources: + limits: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + requests: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + storage: + pvc: + storageClass: "" + + # OCR - uses 2g.48gb (larger slice) + nemoretriever_ocr_v1: + resources: + limits: + nvidia.com/gpu: "0" + nvidia.com/mig-2g.48gb: 1 + requests: + nvidia.com/gpu: "0" + nvidia.com/mig-2g.48gb: 1 + storage: + pvc: + storageClass: "" +# Main NIM Operator overrides for MIG +nimOperator: + # LLM - uses 4g.96gb + nim-llm: + resources: + limits: + nvidia.com/gpu: "0" + nvidia.com/mig-4g.96gb: 1 + requests: + nvidia.com/gpu: "0" + nvidia.com/mig-4g.96gb: 1 + storage: + pvc: + storageClass: "" + model: + engine: tensorrt_llm + precision: "fp8" + qosProfile: "throughput" + tensorParallelism: "1" + gpus: + - product: "rtx6000_blackwell_sv" + # Embedding - uses 1g.24gb + nvidia-nim-llama-32-nv-embedqa-1b-v2: + resources: + limits: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + requests: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + storage: + pvc: + storageClass: "" + # Reranking - uses 1g.24gb + nvidia-nim-llama-32-nv-rerankqa-1b-v2: + resources: + limits: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + requests: + nvidia.com/gpu: "0" + nvidia.com/mig-1g.24gb: 1 + storage: + pvc: + storageClass: "" diff --git a/deploy/helm/nvidia-blueprint-rag/Chart.lock b/deploy/helm/nvidia-blueprint-rag/Chart.lock index 7b479e4aa..723660bfd 100644 --- a/deploy/helm/nvidia-blueprint-rag/Chart.lock +++ b/deploy/helm/nvidia-blueprint-rag/Chart.lock @@ -1,7 +1,7 @@ dependencies: - name: nv-ingest repository: https://helm.ngc.nvidia.com/nvidia/nemo-microservices - version: 26.1.1 + version: 26.1.2 - name: eck-elasticsearch repository: https://helm.elastic.co version: 0.18.0 @@ -14,5 +14,5 @@ dependencies: - name: kube-prometheus-stack repository: https://prometheus-community.github.io/helm-charts version: 76.3.0 -digest: sha256:7f85073bdf19922173b3372d9b5a877d6c2f783b431ce7a2f783308f67806c66 -generated: "2026-02-04T07:29:44.453434343Z" +digest: sha256:a65037bbcb6fa587af3d15b949a32b059cf26d1102a2166d0e77daed29a0f520 +generated: "2026-03-02T16:48:31.702049307+05:30" diff --git a/deploy/helm/nvidia-blueprint-rag/Chart.yaml b/deploy/helm/nvidia-blueprint-rag/Chart.yaml index a8a459279..afe7a5cbd 100644 --- a/deploy/helm/nvidia-blueprint-rag/Chart.yaml +++ b/deploy/helm/nvidia-blueprint-rag/Chart.yaml @@ -1,10 +1,10 @@ apiVersion: v2 -appVersion: v2.4.0 +appVersion: v2.5.0 dependencies: - condition: nv-ingest.enabled name: nv-ingest repository: https://helm.ngc.nvidia.com/nvidia/nemo-microservices - version: 26.1.1 + version: 26.1.2 - condition: eck-elasticsearch.enabled name: eck-elasticsearch repository: https://helm.elastic.co @@ -24,4 +24,4 @@ dependencies: description: An end to end Helm chart for the NVIDIA RAG Blueprint name: nvidia-blueprint-rag type: application -version: v2.4.0 +version: v2.5.0 diff --git a/deploy/helm/nvidia-blueprint-rag/endpoints.md b/deploy/helm/nvidia-blueprint-rag/endpoints.md index 7609053d5..e62b0d1fb 100644 --- a/deploy/helm/nvidia-blueprint-rag/endpoints.md +++ b/deploy/helm/nvidia-blueprint-rag/endpoints.md @@ -24,11 +24,11 @@ This document describes the configurable endpoints used by the RAG server and it ### Embedding Model - **APP_EMBEDDINGS_SERVERURL**: URL for the embedding model service (default: "nemo-retriever-embedding-ms:8000") -- **APP_EMBEDDINGS_MODELNAME**: Name of the embedding model (default: "nvidia/llama-3.2-nv-embedqa-1b-v2") +- **APP_EMBEDDINGS_MODELNAME**: Name of the embedding model (default: "nvidia/llama-nemotron-embed-1b-v2") ### Reranking Model - **APP_RANKING_SERVERURL**: URL for the ranking model service (default: "nemo-retriever-reranking-ms:8000") -- **APP_RANKING_MODELNAME**: Name of the ranking model (default: "nvidia/llama-3.2-nv-rerankqa-1b-v2") +- **APP_RANKING_MODELNAME**: Name of the ranking model (default: "nvidia/llama-nemotron-rerank-1b-v2") ### Reflection Model - **REFLECTION_LLM_SERVERURL**: URL for the reflection LLM service (default: "nim-llm:8000") @@ -42,8 +42,8 @@ This document describes the configurable endpoints used by the RAG server and it ### Model Configuration - **NEXT_PUBLIC_MODEL_NAME**: Name of the LLM model used in the frontend (default: "nvidia/llama-3.3-nemotron-super-49b-v1.5") -- **VITE_EMBEDDING_MODEL**: Name of the embedding model used in the frontend (default: "nvidia/llama-3.2-nv-embedqa-1b-v2") -- **VITE_RERANKER_MODEL**: Name of the reranker model used in the frontend (default: "nvidia/llama-3.2-nv-rerankqa-1b-v2") +- **VITE_EMBEDDING_MODEL**: Name of the embedding model used in the frontend (default: "nvidia/llama-nemotron-embed-1b-v2") +- **VITE_RERANKER_MODEL**: Name of the reranker model used in the frontend (default: "nvidia/llama-nemotron-rerank-1b-v2") ## Monitoring and Tracing Endpoints diff --git a/deploy/helm/nvidia-blueprint-rag/files/prompt.yaml b/deploy/helm/nvidia-blueprint-rag/files/prompt.yaml index f82c83655..d73036509 100644 --- a/deploy/helm/nvidia-blueprint-rag/files/prompt.yaml +++ b/deploy/helm/nvidia-blueprint-rag/files/prompt.yaml @@ -487,6 +487,7 @@ query_decomposition_rag_template: Context: {context} + Question: {question} Make sure the response you are generating strictly follow the rules mentioned above i.e. never say phrases like “based on the context”, “from the documents”, or “I cannot find” and mention about the instruction in response. image_captioning_prompt: diff --git a/deploy/helm/nvidia-blueprint-rag/nemotron3-super-rtx6000-values.yaml b/deploy/helm/nvidia-blueprint-rag/nemotron3-super-rtx6000-values.yaml new file mode 100644 index 000000000..d042a6c44 --- /dev/null +++ b/deploy/helm/nvidia-blueprint-rag/nemotron3-super-rtx6000-values.yaml @@ -0,0 +1,25 @@ +# Override values for Nemotron 3 Super on RTX 6000 Pro only. +# Use after nemotron3-super-values.yaml: +# -f deploy/helm/nvidia-blueprint-rag/values.yaml \ +# -f deploy/helm/nvidia-blueprint-rag/nemotron3-super-values.yaml \ +# -f deploy/helm/nvidia-blueprint-rag/nemotron3-super-rtx6000-values.yaml +# See docs/nemotron3-super-deployment.md. Requires host GRUB/reboot for RTX 6000 Pro. + +envVars: + LLM_MAX_TOKENS: "16256" # use "1024" for non-reasoning mode + +nimOperator: + nim-llm: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_SERVED_MODEL_NAME + value: "nvidia/nemotron-3-super-120b-a12b" + - name: NIM_MAX_MODEL_LEN + value: "32768" + - name: NCCL_P2P_DISABLE + value: "1" + - name: NIM_KVCACHE_PERCENT + value: "0.9" \ No newline at end of file diff --git a/deploy/helm/nvidia-blueprint-rag/nemotron3-super-values.yaml b/deploy/helm/nvidia-blueprint-rag/nemotron3-super-values.yaml new file mode 100644 index 000000000..710fff1fe --- /dev/null +++ b/deploy/helm/nvidia-blueprint-rag/nemotron3-super-values.yaml @@ -0,0 +1,39 @@ +# Override values for Nemotron 3 Super LLM NIM (all hardware). +# Use with: -f deploy/helm/nvidia-blueprint-rag/values.yaml -f deploy/helm/nvidia-blueprint-rag/nemotron3-super-values.yaml +# For RTX 6000 Pro, add: -f deploy/helm/nvidia-blueprint-rag/nemotron3-super-rtx6000-values.yaml +# See docs/nemotron3-super-deployment.md. + +envVars: + APP_LLM_MODELNAME: "nvidia/nemotron-3-super-120b-a12b" + APP_QUERYREWRITER_MODELNAME: "nvidia/nemotron-3-super-120b-a12b" + APP_FILTEREXPRESSIONGENERATOR_MODELNAME: "nvidia/nemotron-3-super-120b-a12b" + REFLECTION_LLM: "nvidia/nemotron-3-super-120b-a12b" + +ingestor-server: + envVars: + SUMMARY_LLM: "nvidia/nemotron-3-super-120b-a12b" + +nimOperator: + nim-llm: + image: + repository: nvcr.io/nim/nvidia/nemotron-3-super-120b-a12b + pullPolicy: IfNotPresent + tag: "1.8.0" + resources: + limits: + nvidia.com/gpu: 2 + requests: + nvidia.com/gpu: 2 + model: + engine: vllm + precision: "fp8" + tensorParallelism: "2" + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_SERVED_MODEL_NAME + value: "nvidia/nemotron-3-super-120b-a12b" + - name: NIM_MAX_MODEL_LEN + value: "131072" diff --git a/deploy/helm/nvidia-blueprint-rag/templates/llm-nim.yaml b/deploy/helm/nvidia-blueprint-rag/templates/llm-nim.yaml index 60f043973..f103a1a72 100644 --- a/deploy/helm/nvidia-blueprint-rag/templates/llm-nim.yaml +++ b/deploy/helm/nvidia-blueprint-rag/templates/llm-nim.yaml @@ -61,4 +61,8 @@ spec: {{- end }} expose: {{ toYaml $nimLlm.expose | nindent 4 }} + {{- with $nimLlm.startupProbe }} + startupProbe: +{{ toYaml . | nindent 4 }} + {{- end }} {{- end }} \ No newline at end of file diff --git a/deploy/helm/nvidia-blueprint-rag/values.yaml b/deploy/helm/nvidia-blueprint-rag/values.yaml index 47ef09b68..00e6914b0 100644 --- a/deploy/helm/nvidia-blueprint-rag/values.yaml +++ b/deploy/helm/nvidia-blueprint-rag/values.yaml @@ -56,8 +56,8 @@ apiKeysSecret: # -- RAG server container image image: - repository: nvcr.io/nvstaging/blueprint/rag-server - tag: "2.4.0" + repository: nvcr.io/nvidia/blueprint/rag-server + tag: "2.5.0" pullPolicy: Always # -- RAG server service configuration @@ -160,15 +160,11 @@ envVars: # URL on which LLM model is hosted. If "", Nvidia hosted API is used APP_LLM_SERVERURL: "nim-llm:8000" # LLM model parameters - LLM_MAX_TOKENS: "32768" + # For Nemotron 3 Super on RTX 6000 Pro: uncomment and set to 16256 (reasoning) or 1024 (non-reasoning); comment LLM_MAX_TOKENS above + LLM_MAX_TOKENS: "32768" # "16256" LLM_TEMPERATURE: "0" LLM_TOP_P: "1.0" - # Enable/disable thinking/reasoning for nemotron-3-nano models (30b variant) - # Set to "true" to enable reasoning mode with reasoning_budget - # Set to "false" to disable reasoning and get direct answers - ENABLE_NEMOTRON_3_NANO_THINKING: "true" - ##===Query Rewriter Model specific configurations=== APP_QUERYREWRITER_MODELNAME: "nvidia/llama-3.3-nemotron-super-49b-v1.5" # URL on which query rewriter model is hosted. If "", Nvidia hosted API is used @@ -183,14 +179,14 @@ envVars: ##===Embedding Model specific configurations=== # URL on which embedding model is hosted. If "", Nvidia hosted API is used - APP_EMBEDDINGS_SERVERURL: "nemoretriever-embedding-ms:8000/v1" - APP_EMBEDDINGS_MODELNAME: "nvidia/llama-3.2-nv-embedqa-1b-v2" + APP_EMBEDDINGS_SERVERURL: "nemotron-embedding-ms:8000/v1" + APP_EMBEDDINGS_MODELNAME: "nvidia/llama-nemotron-embed-1b-v2" APP_EMBEDDINGS_DIMENSIONS: "2048" ##===Reranking Model specific configurations=== # URL on which ranking model is hosted. If "", Nvidia hosted API is used - APP_RANKING_SERVERURL: "nemoretriever-ranking-ms:8000" - APP_RANKING_MODELNAME: "nvidia/llama-3.2-nv-rerankqa-1b-v2" + APP_RANKING_SERVERURL: "nemotron-ranking-ms:8000" + APP_RANKING_MODELNAME: "nvidia/llama-nemotron-rerank-1b-v2" ENABLE_RERANKER: "True" # Default score threshold for filtering documents by reranker relevance (0.0 to 1.0) RERANKER_SCORE_THRESHOLD: "0.0" @@ -260,6 +256,11 @@ envVars: # Whether to filter content within tags in model responses FILTER_THINK_TOKENS: "true" + # Reasoning configuration (supported by Nemotron 3 and other reasoning models) + LLM_ENABLE_THINKING: "false" + LLM_REASONING_BUDGET: "0" + LLM_LOW_EFFORT: "false" + NEMO_GUARDRAILS_URL: "nemo-guardrails:7331" # enable iterative query decomposition @@ -289,8 +290,8 @@ ingestor-server: password: "" image: - repository: nvcr.io/nvstaging/blueprint/ingestor-server - tag: "2.4.0" + repository: nvcr.io/nvidia/blueprint/ingestor-server + tag: "2.5.0" pullPolicy: Always # -- Service config for ingestor-server @@ -349,8 +350,8 @@ ingestor-server: ## APP_EMBEDDINGS_APIKEY and SUMMARY_LLM_APIKEY are loaded from secrets automatically. # === Embeddings Configurations === - APP_EMBEDDINGS_SERVERURL: "nemoretriever-embedding-ms:8000/v1" - APP_EMBEDDINGS_MODELNAME: "nvidia/llama-3.2-nv-embedqa-1b-v2" + APP_EMBEDDINGS_SERVERURL: "nemotron-embedding-ms:8000/v1" + APP_EMBEDDINGS_MODELNAME: "nvidia/llama-nemotron-embed-1b-v2" APP_EMBEDDINGS_DIMENSIONS: "2048" # === NV-Ingest Configurations === @@ -359,6 +360,7 @@ ingestor-server: # === NV-Ingest extraction configurations === APP_NVINGEST_PDFEXTRACTMETHOD: "None" # Method used for text extraction from "None", "pdfium", "nemotron_parse" + APP_NVINGEST_EXTRACTTABLESMETHOD: "yolox" # Method for table extraction: "yolox", "nemotron_parse", or None APP_NVINGEST_EXTRACTTEXT: "True" # Enable text extraction APP_NVINGEST_EXTRACTINFOGRAPHICS: "False" # Enable infographic extraction APP_NVINGEST_EXTRACTTABLES: "True" # Enable table extraction @@ -452,9 +454,9 @@ frontend: replicaCount: 1 image: - repository: nvcr.io/nvstaging/blueprint/rag-frontend + repository: nvcr.io/nvidia/blueprint/rag-frontend pullPolicy: IfNotPresent - tag: "2.4.0" + tag: "2.5.0" imagePullSecret: name: "ngc-secret" @@ -657,11 +659,22 @@ nimOperator: repository: nvcr.io/nim/nvidia/llama-3.3-nemotron-super-49b-v1.5 pullPolicy: IfNotPresent tag: "1.14.0" +# -- For Nemotron 3 Super: uncomment the block below and comment the image block above +# image: +# repository: nvcr.io/nim/nvidia/nemotron-3-super-120b-a12b +# pullPolicy: IfNotPresent +# tag: "1.8.0" resources: limits: nvidia.com/gpu: 1 requests: nvidia.com/gpu: 1 +# -- For Nemotron 3 Super (all hardware): uncomment the block below and comment the resources block above +# resources: +# limits: +# nvidia.com/gpu: 2 +# requests: +# nvidia.com/gpu: 2 nodeSelector: {} tolerations: [] model: @@ -672,6 +685,10 @@ nimOperator: # tensorParallelism: "1" # gpus: # - product: "rtx6000_blackwell_sv" +# -- For Nemotron 3 Super (all hardware): comment "engine: tensorrt_llm" above and uncomment the three lines below +# engine: vllm +# precision: "fp8" +# tensorParallelism: "2" storage: pvc: create: true @@ -702,6 +719,15 @@ nimOperator: value: "1" - name: NIM_SERVED_MODEL_NAME value: "nvidia/llama-3.3-nemotron-super-49b-v1.5" + - name: NIM_MAX_MODEL_LEN + value: "131072" +# -- For Nemotron 3 Super on RTX 6000 Pro: comment the NIM_MAX_MODEL_LEN entry above and uncomment the block below +# - name: NIM_MAX_MODEL_LEN +# value: "32768" +# - name: NCCL_P2P_DISABLE +# value: "1" +# - name: NIM_KVCACHE_PERCENT +# value: "0.9" # - name: CUDA_VISIBLE_DEVICES # value: "0" expose: @@ -710,16 +736,27 @@ nimOperator: type: ClusterIP port: 8000 grpcPort: 8001 + startupProbe: + enabled: true + probe: + httpGet: + path: /v1/health/ready + port: 8000 + initialDelaySeconds: 60 + periodSeconds: 10 + failureThreshold: 750 + timeoutSeconds: 5 + # subsection: nvidia-nim-llama-32-nv-embedqa-1b-v2 # NIM Text Embedding nvidia-nim-llama-32-nv-embedqa-1b-v2: enabled: true replicas: 1 service: - name: "nemoretriever-embedding-ms" + name: "nemotron-embedding-ms" image: - repository: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2 - tag: "1.10.1" + repository: nvcr.io/nim/nvidia/llama-nemotron-embed-1b-v2 + tag: "1.13.0" pullPolicy: IfNotPresent resources: limits: @@ -795,10 +832,10 @@ nimOperator: enabled: true replicas: 1 service: - name: "nemoretriever-ranking-ms" + name: "nemotron-ranking-ms" image: - repository: nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2 - tag: "1.8.0" + repository: nvcr.io/nim/nvidia/llama-nemotron-rerank-1b-v2 + tag: "1.10.0" pullPolicy: IfNotPresent resources: limits: @@ -870,7 +907,7 @@ nv-ingest: create: false image: repository: "nvcr.io/nvidia/nemo-microservices/nv-ingest" - tag: "26.1.1" + tag: "26.1.2" resources: limits: nvidia.com/gpu: 0 @@ -896,8 +933,8 @@ nv-ingest: RAY_num_server_call_thread: "1" RAY_worker_num_grpc_internal_threads: "1" - EMBEDDING_NIM_ENDPOINT: "http://nemoretriever-embedding-ms:8000/v1" - EMBEDDING_NIM_MODEL_NAME: "nvidia/llama-3.2-nv-embedqa-1b-v2" + EMBEDDING_NIM_ENDPOINT: "http://nemotron-embedding-ms:8000/v1" + EMBEDDING_NIM_MODEL_NAME: "nvidia/llama-nemotron-embed-1b-v2" MESSAGE_CLIENT_HOST: "rag-redis-master" MESSAGE_CLIENT_PORT: 6379 MESSAGE_CLIENT_TYPE: "redis" @@ -1015,7 +1052,7 @@ nv-ingest: replicaCount: 1 image: repository: nvcr.io/nim/nvidia/nemoretriever-ocr-v1 - tag: "1.2.0" + tag: "1.2.1" imagePullSecrets: - name: ngc-secret env: @@ -1049,8 +1086,8 @@ nv-ingest: tolerations: [] replicaCount: 1 image: - repository: nvcr.io/nim/nvidia/nemoretriever-graphic-elements-v1 - tag: "1.6.0" + repository: nvcr.io/nim/nvidia/nemotron-graphic-elements-v1 + tag: "1.8.0" env: - name: NIM_HTTP_API_PORT value: "8000" @@ -1082,8 +1119,8 @@ nv-ingest: tolerations: [] replicaCount: 1 image: - repository: nvcr.io/nim/nvidia/nemoretriever-page-elements-v3 - tag: "1.7.0" + repository: nvcr.io/nim/nvidia/nemotron-page-elements-v3 + tag: "1.8.0" env: - name: NIM_HTTP_API_PORT value: "8000" @@ -1133,8 +1170,8 @@ nv-ingest: tolerations: [] replicaCount: 1 image: - repository: nvcr.io/nim/nvidia/nemoretriever-table-structure-v1 - tag: "1.6.0" + repository: nvcr.io/nim/nvidia/nemotron-table-structure-v1 + tag: "1.8.0" env: - name: NIM_HTTP_API_PORT value: "8000" diff --git a/deploy/workbench/README.md b/deploy/workbench/README.md index 6d02a360e..179c32ec5 100644 --- a/deploy/workbench/README.md +++ b/deploy/workbench/README.md @@ -75,4 +75,4 @@ Use of the models in this blueprint is governed by the [NVIDIA AI Foundation Mod ## Terms of Use This blueprint is governed by the [NVIDIA Agreements | Enterprise Software | NVIDIA Software License Agreement](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-software-license-agreement/) and the [NVIDIA Agreements | Enterprise Software | Product Specific Terms for AI Product](https://www.nvidia.com/en-us/agreements/enterprise-software/product-specific-terms-for-ai-products/). The models are governed by the [NVIDIA Agreements | Enterprise Software | NVIDIA Community Model License](https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-community-models-license/) and the [NVIDIA RAG dataset](https://github.com/NVIDIA-AI-Blueprints/rag/tree/v2.0.0/data/multimodal) which is governed by the [NVIDIA Asset License Agreement](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/data/LICENSE.DATA). -The following models that are built with Llama are governed by the [Llama 3.2 Community License Agreement](https://www.llama.com/llama3_2/license/): nvidia/llama-3.3-nemotron-super-49b-v1, nvidia/llama-3.2-nv-embedqa-1b-v2, and nvidia/llama-3.2-nv-rerankqa-1b-v2. +The following models that are built with Llama are governed by the [Llama 3.2 Community License Agreement](https://www.llama.com/llama3_2/license/): nvidia/llama-3.3-nemotron-super-49b-v1, nvidia/llama-nemotron-embed-1b-v2, and nvidia/llama-nemotron-rerank-1b-v2. diff --git a/deploy/workbench/compose.yaml b/deploy/workbench/compose.yaml index 04cfdd2e2..91d4b3d28 100644 --- a/deploy/workbench/compose.yaml +++ b/deploy/workbench/compose.yaml @@ -28,9 +28,9 @@ services: retries: 100 profiles: ["local"] - nemoretriever-embedding-ms: - container_name: nemoretriever-embedding-ms - image: nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2:1.10.1 + nemotron-embedding-ms: + container_name: nemotron-embedding-ms + image: nvcr.io/nim/nvidia/llama-nemotron-embed-1b-v2:1.13.0 volumes: - ${MODEL_DIRECTORY:-/tmp}:/opt/nim/.cache ports: @@ -58,9 +58,9 @@ services: start_period: 10m profiles: ["local"] - nemoretriever-ranking-ms: - container_name: nemoretriever-ranking-ms - image: nvcr.io/nim/nvidia/llama-3.2-nv-rerankqa-1b-v2:1.8.0 + nemotron-ranking-ms: + container_name: nemotron-ranking-ms + image: nvcr.io/nim/nvidia/llama-nemotron-rerank-1b-v2:1.10.0 volumes: - ${MODEL_DIRECTORY:-/tmp}:/opt/nim/.cache ports: @@ -86,7 +86,7 @@ services: profiles: ["local"] page-elements: - image: ${YOLOX_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-page-elements-v3}:${YOLOX_TAG:-1.7.0} + image: ${YOLOX_IMAGE:-nvcr.io/nim/nvidia/nemotron-page-elements-v3}:${YOLOX_TAG:-1.8.0} ports: - "8000:8000" - "8001:8001" @@ -122,7 +122,7 @@ services: profiles: ["local"] graphic-elements: - image: ${YOLOX_GRAPHIC_ELEMENTS_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-graphic-elements-v1}:${YOLOX_GRAPHIC_ELEMENTS_TAG:-1.6.0} + image: ${YOLOX_GRAPHIC_ELEMENTS_IMAGE:-nvcr.io/nim/nvidia/nemotron-graphic-elements-v1}:${YOLOX_GRAPHIC_ELEMENTS_TAG:-1.8.0} ports: - "8003:8000" - "8004:8001" @@ -147,7 +147,7 @@ services: profiles: ["local"] table-structure: - image: ${YOLOX_TABLE_STRUCTURE_IMAGE:-nvcr.io/nim/nvidia/nemoretriever-table-structure-v1}:${YOLOX_TABLE_STRUCTURE_TAG:-1.6.0} + image: ${YOLOX_TABLE_STRUCTURE_IMAGE:-nvcr.io/nim/nvidia/nemotron-table-structure-v1}:${YOLOX_TABLE_STRUCTURE_TAG:-1.8.0} ports: - "8006:8000" - "8007:8001" @@ -200,7 +200,7 @@ services: # Main ingestor server which is responsible for ingestion ingestor-server: container_name: ingestor-server - image: nvcr.io/nvstaging/blueprint/ingestor-server:${TAG:-2.4.0} + image: nvcr.io/nvidia/blueprint/ingestor-server:${TAG:-2.5.0} build: # Set context to repo's root directory context: ../../ @@ -256,8 +256,8 @@ services: ##===Embedding Model specific configurations=== # url on which embedding model is hosted. If "", Nvidia hosted API is used - APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemoretriever-embedding-ms:8000/v1"} - APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-3.2-nv-embedqa-1b-v2} + APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemotron-embedding-ms:8000/v1"} + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-nemotron-embed-1b-v2} APP_EMBEDDINGS_DIMENSIONS: ${APP_EMBEDDINGS_DIMENSIONS:-2048} ##===NV-Ingest Connection Configurations======= @@ -333,7 +333,7 @@ services: profiles: ["ingest"] nv-ingest-ms-runtime: - image: nvcr.io/nvidia/nemo-microservices/nv-ingest:26.1.1 + image: nvcr.io/nvidia/nemo-microservices/nv-ingest:26.1.2 # cpuset: "0-15" # Uncomment to restrict this container to CPU cores 0–15 shm_size: 40gb # Should be at minimum 30% of assigned memory per Ray documentation volumes: @@ -399,20 +399,20 @@ services: - REDIS_MORPHEUS_TASK_QUEUE=morpheus_task_queue # Self-hosted redis endpoints. # build.nvidia.com hosted yolox endpoints. - # - YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3 + # - YOLOX_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3 # - YOLOX_INFER_PROTOCOL=http - YOLOX_PAGE_IMAGE_FORMAT=JPEG - YOLOX_GRPC_ENDPOINT=${YOLOX_GRPC_ENDPOINT:-page-elements:8001} - YOLOX_HTTP_ENDPOINT=${YOLOX_HTTP_ENDPOINT:-http://page-elements:8000/v1/infer} - YOLOX_INFER_PROTOCOL=${YOLOX_INFER_PROTOCOL:-grpc} # build.nvidia.com hosted yolox-graphics-elements endpoints. - #- YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-graphic-elements-v1 + #- YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1 #- YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=http - YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT=${YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT:-graphic-elements:8001} - YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT=${YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT:-http://graphic-elements:8000/v1/infer} - YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=${YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL:-grpc} # build.nvidia.com hosted yolox-table-elements endpoints. - #- YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-table-structure-v1 + #- YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1 #- YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=http - YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT=${YOLOX_TABLE_STRUCTURE_GRPC_ENDPOINT:-table-structure:8001} - YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT=${YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT:-http://table-structure:8000/v1/infer} @@ -432,7 +432,7 @@ services: # Main orchestrator server which stiches together all calls to different services to fulfill the user request rag-server: container_name: rag-server - image: nvcr.io/nvstaging/blueprint/rag-server:${TAG:-2.4.0} + image: nvcr.io/nvidia/blueprint/rag-server:${TAG:-2.5.0} build: # Set context to repo's root directory context: ../../ @@ -495,13 +495,13 @@ services: ##===Embedding Model specific configurations=== # url on which embedding model is hosted. If "", Nvidia hosted API is used - APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemoretriever-embedding-ms:8000/v1"} - APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-3.2-nv-embedqa-1b-v2} + APP_EMBEDDINGS_SERVERURL: ${APP_EMBEDDINGS_SERVERURL-"nemotron-embedding-ms:8000/v1"} + APP_EMBEDDINGS_MODELNAME: ${APP_EMBEDDINGS_MODELNAME:-nvidia/llama-nemotron-embed-1b-v2} ##===Reranking Model specific configurations=== # url on which ranking model is hosted. If "", Nvidia hosted API is used - APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL-"nemoretriever-ranking-ms:8000"} - APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/llama-3.2-nv-rerankqa-1b-v2"} + APP_RANKING_SERVERURL: ${APP_RANKING_SERVERURL-"nemotron-ranking-ms:8000"} + APP_RANKING_MODELNAME: ${APP_RANKING_MODELNAME:-"nvidia/llama-nemotron-rerank-1b-v2"} ENABLE_RERANKER: ${ENABLE_RERANKER:-True} ##===VLM Model specific configurations=== @@ -569,7 +569,7 @@ services: # Sample UI container which interacts with APIs exposed by rag-server container rag-frontend: container_name: rag-frontend - image: nvcr.io/nvstaging/blueprint/rag-frontend:${TAG:-2.4.0} + image: nvcr.io/nvidia/blueprint/rag-frontend:${TAG:-2.5.0} build: # Set context to repo's root directory context: ../../frontend diff --git a/deploy/workbench/quickstart.ipynb b/deploy/workbench/quickstart.ipynb index d9a15a71a..00c524aba 100644 --- a/deploy/workbench/quickstart.ipynb +++ b/deploy/workbench/quickstart.ipynb @@ -966,10 +966,10 @@ " \"enable_citations\": True,\n", " \"model\": \"nvidia/llama-3.3-nemotron-super-49b-v1.5\",\n", " \"llm_endpoint\": \"nim-llm:8000\",\n", - " \"embedding_model\": \"nvidia/llama-3.2-nv-embedqa-1b-v2\",\n", - " \"embedding_endpoint\": \"nemoretriever-embedding-ms:8000/v1\",\n", - " \"reranker_model\": \"nvidia/llama-3.2-nv-rerankqa-1b-v2\",\n", - " \"reranker_endpoint\": \"nemoretriever-ranking-ms:8000\",\n", + " \"embedding_model\": \"nvidia/llama-nemotron-embed-1b-v2\",\n", + " \"embedding_endpoint\": \"nemotron-embedding-ms:8000/v1\",\n", + " \"reranker_model\": \"nvidia/llama-nemotron-rerank-1b-v2\",\n", + " \"reranker_endpoint\": \"nemotron-ranking-ms:8000\",\n", " \"stop\": [],\n", "}\n", "\n", @@ -1030,10 +1030,10 @@ " \"enable_citations\": True,\n", " \"model\": \"nvidia/llama-3.3-nemotron-super-49b-v1.5\",\n", " \"llm_endpoint\": \"nim-llm:8000\",\n", - " \"embedding_model\": \"nvidia/llama-3.2-nv-embedqa-1b-v2\",\n", - " \"embedding_endpoint\": \"nemoretriever-embedding-ms:8000/v1\",\n", - " \"reranker_model\": \"nvidia/llama-3.2-nv-rerankqa-1b-v2\",\n", - " \"reranker_endpoint\": \"nemoretriever-ranking-ms:8000\",\n", + " \"embedding_model\": \"nvidia/llama-nemotron-embed-1b-v2\",\n", + " \"embedding_endpoint\": \"nemotron-embedding-ms:8000/v1\",\n", + " \"reranker_model\": \"nvidia/llama-nemotron-rerank-1b-v2\",\n", + " \"reranker_endpoint\": \"nemotron-ranking-ms:8000\",\n", " \"stop\": [],\n", "}\n", "\n", @@ -1175,10 +1175,10 @@ " ],\n", " \"enable_query_rewriting\": False,\n", " \"enable_reranker\": False,\n", - " \"embedding_model\": \"nvidia/llama-3.2-nv-embedqa-1b-v2\",\n", - " \"embedding_endpoint\": \"nemoretriever-embedding-ms:8000/v1\",\n", - " \"reranker_model\": \"nvidia/llama-3.2-nv-rerankqa-1b-v2\",\n", - " \"reranker_endpoint\": \"nemoretriever-ranking-ms:8000\",\n", + " \"embedding_model\": \"nvidia/llama-nemotron-embed-1b-v2\",\n", + " \"embedding_endpoint\": \"nemotron-embedding-ms:8000/v1\",\n", + " \"reranker_model\": \"nvidia/llama-nemotron-rerank-1b-v2\",\n", + " \"reranker_endpoint\": \"nemotron-ranking-ms:8000\",\n", "}\n", "\n", "\n", @@ -1233,10 +1233,10 @@ " ],\n", " \"enable_query_rewriting\": False,\n", " \"enable_reranker\": True,\n", - " \"embedding_model\": \"nvidia/llama-3.2-nv-embedqa-1b-v2\",\n", - " \"embedding_endpoint\": \"nemoretriever-embedding-ms:8000/v1\",\n", - " \"reranker_model\": \"nvidia/llama-3.2-nv-rerankqa-1b-v2\",\n", - " \"reranker_endpoint\": \"nemoretriever-ranking-ms:8000\",\n", + " \"embedding_model\": \"nvidia/llama-nemotron-embed-1b-v2\",\n", + " \"embedding_endpoint\": \"nemotron-embedding-ms:8000/v1\",\n", + " \"reranker_model\": \"nvidia/llama-nemotron-rerank-1b-v2\",\n", + " \"reranker_endpoint\": \"nemotron-ranking-ms:8000\",\n", "}\n", "\n", "\n", diff --git a/docs/accuracy-benchmarks.md b/docs/accuracy-benchmarks.md new file mode 100644 index 000000000..6ae3a4529 --- /dev/null +++ b/docs/accuracy-benchmarks.md @@ -0,0 +1,126 @@ + + +# Benchmarking RAG Accuracy: Evaluating LLM Reasoning and VLM Integration + +In the fast-moving world of Retrieval-Augmented Generation (RAG), the gap between a “good” system and one that’s truly production-ready often depends on how effectively the pipeline manages complex reasoning and multimodal data. To measure these advancements, our team conducted extensive benchmarks across multiple configurations, examining the influence of LLM reasoning (“Think” mode) and Vision-Language Models (VLM). + +## Benchmarked Datasets + +Our analysis centered on seven major public datasets encompassing a broad range of challenges, from financial reasoning to intricate structural document parsing. + +| Dataset | Domain | Corpus Language | Main Modalities | # Pages | # Queries | +|---|---|---|---|---|---| +| [RagBattlepacket](https://www.eyelevel.ai/post/most-accurate-rag) | Finance, Tax & Consulting | English | Text, Tables, Charts, Infographics | 1,141 | 92 | +| [KG-RAG](https://github.com/docugami/KG-RAG-datasets/tree/main/sec-10-q/data/v1) | Finance (SEC 10-Q) | English | Text, Tables | 1,037 | 195 | +| [Financebench](https://github.com/patronus-ai/financebench) | Finance (Public Equity) | English | Text, Tables | 54,057 | 150 | +| [DC767](https://digitalcorpora.org/) | General (Gov, NGO, Health) | English | Text, Tables | 54,730 | 488 | +| [HotPotQA](https://huggingface.co/datasets/hotpotqa/hotpot_qa) | Wikipedia-based question-answer pairs | English | Text | 2,673 (txt files) | 979 | +| [Google Frames](https://huggingface.co/datasets/google/frames-benchmark) | History, Sports, Science, Animals, Health | English | Text | 31,708 | 824 | + +### [Vidore-V3 Dataset](https://huggingface.co/blog/QuentinJG/introducing-vidore-v3#public-datasets) + +| Dataset | Domain | Corpus Language | Main Modalities | # Pages | # Queries (with translations) | +|---|---|---|---|---|---| +| French Public Company Annual Reports | Finance-FR | French | Text, Table, Charts | 2,384 | 1,920 | +| U.S. Public Company Annual Reports | Finance-EN | English | Text, Table | 2,942 | 1,854 | +| Computer Science Textbooks | Computer Science | English | Text, Infographic, Tables | 1,360 | 1,290 | +| HR Reports from EU | HR | English | Text, Table, Charts | 1,110 | 1,908 | +| French Governmental Energy Reports | Energy | French | Text, Charts | 2,229 | 1,848 | +| USAF Technical Orders | Industrial | English | Text, Tables, Infographics, Images | 5,244 | 1,698 | +| FDA Reports | Pharmaceuticals | English | Text, Charts, Images, Infographic, Tables | 2,313 | 2,184 | +| French Physics Lectures | Physics | French | Text, Images, Infographics | 1,674 | 1,812 | + + +## Evaluation Methodology + +Our primary evaluation metric is end-to-end RAG answer accuracy, measured using the [NVIDIA Answer Accuracy metric from RAGAS](https://docs.ragas.io/en/stable/concepts/metrics/available_metrics/nvidia_metrics/). Each response is rated on a 0–4 scale by an LLM judge, with scores normalized to a range for reporting. We chose [mistralai/Mixtral-8x22B-Instruct-v0.1](https://build.nvidia.com/mistralai/mixtral-8x22b-instruct) as the LLM judge, guided by performance on the [Judge’s Verdict](https://huggingface.co/spaces/nvidia/judges-verdict) benchmark. + +Full evaluation pipeline: [evaluation_01_ragas.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/evaluation_01_ragas.ipynb) + +- Metric: Accuracy, defined as the degree to which generated responses align with the ground truth answers. +- Pipeline configuration: All experiments were run using the default configuration. +- Generation models: + - LLM: nvidia/llama-3.3-nemotron-super-49b-v1.5 + - VLM: nvidia/nemotron-nano-vl-12b-v2 +- Judge model: mistralai/Mixtral-8x22B-Instruct-v0.1 + +## Configuration and Accuracy Results + +We tested four main configurations to evaluate how ["Reasoning" (Think On)](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/docs/enable-nemotron-thinking.md) and ["Vision Language Model" (VLM)](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/docs/vlm.md) features influence accuracy. In the VLM-based generation pipeline, image captioning was enabled during data ingestion. For text-only datasets, we excluded the VLM-based generation setup from evaluation. + +| Dataset | LLM (Reasoning Off) | LLM (Reasoning On) | VLM (Reasoning Off) | VLM (Reasoning On) | +|---|---|---|---|---| +| FinanceBench | 0.612 | 0.668 | 0.622 | 0.697 | +| KG-RAG | 0.569 | 0.593 | 0.596 | 0.643 | +| RAGBattle | 0.812 | 0.818 | 0.867 | 0.842 | +| DC767 | 0.906 | 0.899 | 0.907 | 0.897 | +| Hotpotqa | 0.672 | 0.676 | n/a | n/a | +| Google Frames | 0.486 | 0.597 | n/a | n/a | + +The table in the following section summarizes the accuracy scores for each dataset across our experimental configurations. + +### Vidore-V3 Results + +For the Vidore-v3 evaluation, we combined all domains into a single collection and then performed domain-specific evaluations. + +| Dataset subsets | LLM (Reasoning Off) | LLM (Reasoning On) | VLM (Reasoning Off) | VLM (Reasoning On) | +|---|---|---|---|---| +| Computer Science | 0.894 | 0.882 | 0.927 | 0.931 | +| Energy | 0.751 | 0.765 | 0.802 | 0.824 | +| Finance EN | 0.699 | 0.718 | 0.758 | 0.766 | +| Pharmaceuticals | 0.759 | 0.775 | 0.849 | 0.858 | +| HR | 0.726 | 0.735 | 0.767 | 0.804 | +| Industrial | 0.677 | 0.674 | 0.733 | 0.758 | +| Physics | 0.840 | 0.806 | 0.903 | 0.910 | +| Finance FR | 0.639 | 0.647 | 0.683 | 0.687 | + + +## Key Results + +The following sections describe the key results from our analysis. + +### The "Reasoning Dividend" in FinanceBench and KG-RAG + +For FinanceBench and KG-RAG datasets we have observed improved accuracy with reasoning on. + +Why it makes sense + +- FinanceBench is heavily table-centric—about 75% of queries involve tables—and many of these require mathematical operations or extracting data across multiple line items. Simple retrieval is not sufficient; the model must perform an explicit reasoning step to carry out the necessary arithmetic and cross-referencing to match the human-annotated ground truth. + +- KG-RAG requires temporal reasoning (for example, comparing Q3 2022 with Q1 2023). Without reasoning enabled, the model may retrieve the correct company but the wrong fiscal quarter. Turning Reasoning On lets the LLM check dates and periods before finalizing its answer. + +### The Multimodal Unlock: Decoding Visual Complexity in ViDoRe and RAGBattlePacket + +Across both the ViDoRe benchmark and RAGBattlePacket, we saw best results when moving from a text-only LLM to a VLM. RAGBattlePacket reached its highest baseline accuracy (0.867) simply by enabling the VLM, and ViDoRe showed broad gains across nearly all of its diverse sub-domains. + +Why it makes sense + +- Preserving Spatial Layouts (ViDoRe): Sub-domains like Finance and Pharmaceuticals depend on rigid tables and charts that text-only pipelines often fail to capture. A VLM can directly “see” and preserve these structures, leading to higher accuracy on this benchmark. +- Targeting Visual Queries (RAGBattlePacket): About 10% of RAGBattlePacket queries focus on charts, bar graphs, and customer journey diagrams, which standard pipelines often hallucinate on or ignore. A VLM can directly interpret these visuals, returning precise percentages and preserving the underlying structure. + +### Semantic Robustness in DC767 + +This dataset showed the highest overall stability, maintaining roughly 0.90 or higher accuracy across almost all configurations. + +Why it makes sense + +Because the dataset is about 70% text-based prose, it relies heavily on high-quality embeddings and semantic search. Our core retriever is clearly optimized for dense text retrieval, as adding Vision or Reasoning produced only a marginal gain (about a 1.1% change). This suggests that our base RAG engine is already very strong for standard retriever-focused tasks. + +### Reasoning as the Catalyst in Google Frames + +This dataset demonstrated the true impact of active reasoning on complex, multi-hop queries. By turning reasoning on, the model achieved a massive leap in overall performance. This gain represents our most significant improvement driven purely by logical processing. + +Why it makes sense + +Google Frames targets complex queries that require synthesizing facts across multiple documents while tracking overlapping constraints. A standard LLM often struggles to keep all these parameters in mind in a single pass. Turning on reasoning enables the model to systematically decompose multi-step logic and verify dependencies, which is essential for accurate factual extraction. + +## Related Topics + +- [Evaluate Your NVIDIA RAG Blueprint System](evaluate.md) +- [Enable Reasoning in Nemotron LLM Models](enable-nemotron-thinking.md) +- [VLM-Based Inferencing in RAG](vlm.md) +- [Image Captioning Support](image_captioning.md) +- [Best Practices for Common Settings](accuracy_perf.md) \ No newline at end of file diff --git a/docs/accuracy_perf.md b/docs/accuracy_perf.md index 8ee491fa6..c24f0c9a5 100644 --- a/docs/accuracy_perf.md +++ b/docs/accuracy_perf.md @@ -14,7 +14,7 @@ Change the setting if you want different behavior. | Name | Default | Description | Advantages | Disadvantages | |----------------------|------------|---------------------|----------------------|--------------------------| | `APP_NVINGEST_CHUNKOVERLAP` | `150` | Increase overlap to ensure smooth transitions between chunks. | - Larger overlap provides smoother transitions between chunks.
| - Might increase processing overhead.
| -| `APP_NVINGEST_CHUNKSIZE` | `512` | Increase chunk size for more context. | - Larger chunks retain more context, improving coherence.
| - Larger chunks increase embedding size, slowing retrieval.
- Longer chunks might increase latency due to larger prompt size.
| +| `APP_NVINGEST_CHUNKSIZE` | `512` | Increase chunk size for more context. | - Larger chunks retain more context, improving coherence.
- Larger chunks increase compute time for embedding creation.
- Larger chunks can lead to longer retrieved context, increasing generation latency.
- Very large chunks may dilute semantic focus, reducing embedding precision.
| | `APP_NVINGEST_ENABLEPDFSPLITTER` | `true` | Set to `true` to perform chunk-based splitting of pdfs after the default page-level extraction occurs. Recommended for PDFs that are mostly text content. | - Provides more granular content segmentation.
| - Can increase the number of chunks and slow down the ingestion process.
| | `APP_NVINGEST_EXTRACTCHARTS` | `true` | Set to `true` to extract charts. | - Improves accuracy for documents that contain charts.
| - Increases ingestion time.
| | `APP_NVINGEST_EXTRACTIMAGES` | `false` | Set to `true` to enable image captioning during ingestion. For details, refer to [Image Captioning Support](image_captioning.md). | - Enhances multimodal retrieval accuracy for documents having images.
| - Increased processing time during ingestion.
- Requires additional GPU resources for VLM model deployment.
| @@ -30,14 +30,14 @@ Change the setting if you want different behavior. | Name | Default | Description | Advantages | Disadvantages | |----------------------|------------|---------------------|----------------------|--------------------------| -| - `APP_LLM_MODELNAME`
- `APP_EMBEDDINGS_MODELNAME`
- `APP_RANKING_MODELNAME`
| See description | The default models are the following:
- `nvidia/llama-3.3-nemotron-super-49b-v1.5`
- `nvidia/llama-3.2-nv-embedqa-1b-v2`
- `nvidia/llama-3.2-nv-rerankqa-1b-v2`

You can use larger models. For details, refer to [Change the Inference or Embedding Model](change-model.md). | - Higher accuracy with better reasoning and a larger context length.
| - Slower response time.
- Higher inference cost.
- Higher GPU requirement.
| +| - `APP_LLM_MODELNAME`
- `APP_EMBEDDINGS_MODELNAME`
- `APP_RANKING_MODELNAME`
| See description | The default models are the following:
- `nvidia/llama-3.3-nemotron-super-49b-v1.5`
- `nvidia/llama-nemotron-embed-1b-v2`
- `nvidia/llama-nemotron-rerank-1b-v2`

You can use larger models. For details, refer to [Change the Inference or Embedding Model](change-model.md). | - Higher accuracy with better reasoning and a larger context length.
| - Slower response time.
- Higher inference cost.
- Higher GPU requirement.
| | `APP_VECTORSTORE_SEARCHTYPE` | `dense` | Set to `hybrid` to enable hybrid search. For details, refer to [Hybrid Search Support](hybrid_search.md). | - Can provide better retrieval accuracy for domain-specific content.
| - Can induce higher latency for large number of documents.
| | `ENABLE_GUARDRAILS` | `false` | Set to `true` to enable NeMo Guardrails. For details, refer to [Nemo Guardrails Support](nemo-guardrails.md). | - Applies input/output constraints for better safety and consistency.
| - Significant increased processing overhead for additional LLM calls.
- Needs additional GPUs to deploy guardrails-specific models locally.
| | `ENABLE_QUERYREWRITER` | `false` | Set to `true` to enable query rewriting. For details, refer to [Multi-Turn Conversation Support](multiturn.md). | - Enhances retrieval accuracy for multi-turn scenarios by rephrasing the query.
| - Adds an extra LLM call, increasing latency.
| | `ENABLE_REFLECTION` | `false` | Set to `true` to enable self-reflection. For details, refer to [Self-Reflection Support](self-reflection.md). | - Can improve the response quality by refining intermediate retrieval and final LLM output.
| - Significantly higher latency due to multiple iterations of LLM model call.
- You might need to deploy a separate judge LLM model, increasing GPU requirement.
| | `ENABLE_RERANKER` | `true` | Set to `true` to use the reranking model. | - Improves accuracy by selecting better documents for response generation.
| - Increases latency due to additional processing.
- Additional hardware requirements for self-hosted on premises deployment.
| | `ENABLE_VLM_INFERENCE` | `false` | Set to `true` to use the Vision-Language Model (VLM) for response generation. For details, refer to [VLM for Generation](vlm.md). | - Enables analysis of retrieved images alongside text for richer, multimodal responses.
- Can process up to 4 images per citation.
- Useful for document Q&A, visual search, and multimodal chatbots.
| - Requires additional GPU resources for VLM model deployment.
- Increases latency due to image processing.
| -| Reasoning in `llama-3.3-nemotron-super-49b-v1.5` | `/no_think` | Use `/think` to enable reasoning. For details, refer to [Enable Reasoning](enable-nemotron-thinking.md). | - Improves response quality through enhanced reasoning capabilities.
- Yields more precise responses. The default model is verbose and works best with reasoning enabled.
| - Can increase response latency due to additional thinking process.
- Can increase token usage and computational overhead.
| +| `LLM_ENABLE_THINKING` | `false` | Set to `true` to enable reasoning for Nemotron 3 models. Use `LLM_REASONING_BUDGET` and `LLM_LOW_EFFORT` for fine-grained control. For Nemotron 1.5 models, use the `/think` system prompt instead. For details, refer to [Enable Reasoning](enable-nemotron-thinking.md). | - Improves response quality through enhanced reasoning capabilities.
- Yields more precise responses.
| - Can increase response latency due to additional thinking process.
- Can increase token usage and computational overhead.
| | `RERANKER_SCORE_THRESHOLD` | `0.0` | Filters out retrieved chunks if reranker relevance is lower than this threshold. We recommend that you set this value between `0.3` and `0.5` to balance quality and coverage. For details, refer to [Use the Python Package](python-client.md). | - Faster retrieval by processing fewer documents.
- Can improve accuracy by excluding low-relevance documents.
| - Requires `ENABLE_RERANKER` set to `true` for effective filtering.
- Might filter out too many chunks if the threshold is set high, causing no response from the RAG server.
| | `RERANKER TOP K` | 10 | Increase `reranker TOP K` to increase the probability of relevant context being part of the top-k contexts. | Increasing the value can improve accuracy. | Increasing the value can increase latency. | | `VDB TOP K` | 100 | Increase `VDB TOP K` to provide a larger candidate pool for reranking. | Increasing the value can improve accuracy. | Increasing the value can increase latency. | diff --git a/docs/api-ingestor.md b/docs/api-ingestor.md index 443a521a6..adeeb4cf0 100644 --- a/docs/api-ingestor.md +++ b/docs/api-ingestor.md @@ -8,7 +8,7 @@ This documentation contains the OpenAPI reference for the ingestor server. :::{tip} -To view this documentation on docs.nvidia.com, go to https://docs.nvidia.com/rag/latest/api-ingestor.html. +To view this documentation on docs.nvidia.com, browse to [https://docs.nvidia.com/rag/latest/api-ingestor](https://docs.nvidia.com/rag/latest/api-ingestor.html). ::: @@ -41,7 +41,7 @@ The status response includes progress metrics updated after each batch completes For more granular progress updates during batch processing, use the `nv_ingest_status` object described below, which tracks individual document extraction progress and updates more frequently than the batch-level metrics. ::: -### NV-Ingest Extraction Status +### Extraction status The `/status` endpoint response includes an `nv_ingest_status` object that provides real-time document extraction progress, updating more frequently than batch-level metrics. This is useful for monitoring individual document processing when polling the status endpoint: @@ -53,7 +53,7 @@ The `/status` endpoint response includes an `nv_ingest_status` object that provi | Status | Description | |--------|-------------| | `not_started` | Document queued, extraction not yet initiated | -| `submitted` | Document submitted to NV-Ingest for processing | +| `submitted` | Document submitted to NeMo Retriever Library for processing | | `processing` | Document extraction is in progress | | `completed` | Document extraction completed successfully | | `failed` | Document extraction failed | diff --git a/docs/api-rag.md b/docs/api-rag.md index 366d44b0f..7a15d8890 100644 --- a/docs/api-rag.md +++ b/docs/api-rag.md @@ -8,8 +8,10 @@ This documentation contains the OpenAPI reference for the RAG server. :::{tip} -To view this documentation on docs.nvidia.com, go to https://docs.nvidia.com/rag/latest/api-rag.html. +To view this documentation on docs.nvidia.com, browse to [https://docs.nvidia.com/rag/latest/api-rag](https://docs.nvidia.com/rag/latest/api-rag.html). ::: +======= +To view this documentation on docs.nvidia.com, browse to [https://docs.nvidia.com/rag/latest/api-rag](https://docs.nvidia.com/rag/latest/api-rag.html). :::{swagger-plugin} ../docs/api_reference/openapi_schema_rag_server.json diff --git a/docs/api_reference/openapi_schema_rag_server.json b/docs/api_reference/openapi_schema_rag_server.json index 63bbb4e33..5bcf2ec7d 100644 --- a/docs/api_reference/openapi_schema_rag_server.json +++ b/docs/api_reference/openapi_schema_rag_server.json @@ -707,7 +707,7 @@ "maxLength": 256, "title": "Embedding Model", "description": "Name of the embedding model used for vectorization.", - "default": "nvdev/nvidia/llama-3.2-nv-embedqa-1b-v2" + "default": "nvdev/nvidia/llama-nemotron-embed-1b-v2" }, "embedding_endpoint": { "type": "string", @@ -721,7 +721,7 @@ "maxLength": 256, "title": "Reranker Model", "description": "Name of the reranker model used for ranking results.", - "default": "nvidia/llama-3.2-nv-rerankqa-1b-v2" + "default": "nvidia/llama-nemotron-rerank-1b-v2" }, "reranker_endpoint": { "anyOf": [ @@ -1342,7 +1342,7 @@ "maxLength": 256, "title": "Embedding Model", "description": "Name of the embedding model used for vectorization.", - "default": "nvdev/nvidia/llama-3.2-nv-embedqa-1b-v2" + "default": "nvdev/nvidia/llama-nemotron-embed-1b-v2" }, "embedding_endpoint": { "anyOf": [ @@ -1363,7 +1363,7 @@ "maxLength": 256, "title": "Reranker Model", "description": "Name of the reranker model used for ranking results.", - "default": "nvidia/llama-3.2-nv-rerankqa-1b-v2" + "default": "nvidia/llama-nemotron-rerank-1b-v2" }, "reranker_endpoint": { "anyOf": [ diff --git a/docs/assets/perf-benchmarks/bo767_h100_performance.png b/docs/assets/perf-benchmarks/bo767_h100_performance.png new file mode 100644 index 000000000..05e202279 Binary files /dev/null and b/docs/assets/perf-benchmarks/bo767_h100_performance.png differ diff --git a/docs/assets/perf-benchmarks/cross_dataset_llm_reasoning_off.png b/docs/assets/perf-benchmarks/cross_dataset_llm_reasoning_off.png new file mode 100644 index 000000000..42eed61f9 Binary files /dev/null and b/docs/assets/perf-benchmarks/cross_dataset_llm_reasoning_off.png differ diff --git a/docs/assets/perf-benchmarks/hotpotqa_h100_performance.png b/docs/assets/perf-benchmarks/hotpotqa_h100_performance.png new file mode 100644 index 000000000..a4190fe68 Binary files /dev/null and b/docs/assets/perf-benchmarks/hotpotqa_h100_performance.png differ diff --git a/docs/assets/perf-benchmarks/kgrag_h100_performance.png b/docs/assets/perf-benchmarks/kgrag_h100_performance.png new file mode 100644 index 000000000..496b77500 Binary files /dev/null and b/docs/assets/perf-benchmarks/kgrag_h100_performance.png differ diff --git a/docs/assets/perf-benchmarks/ragbattlepacket_h100_performance.png b/docs/assets/perf-benchmarks/ragbattlepacket_h100_performance.png new file mode 100644 index 000000000..aaf068308 Binary files /dev/null and b/docs/assets/perf-benchmarks/ragbattlepacket_h100_performance.png differ diff --git a/docs/assets/perf-benchmarks/wikipedia_synthetic_h100_performance.png b/docs/assets/perf-benchmarks/wikipedia_synthetic_h100_performance.png new file mode 100644 index 000000000..29047d872 Binary files /dev/null and b/docs/assets/perf-benchmarks/wikipedia_synthetic_h100_performance.png differ diff --git a/docs/audio_ingestion.md b/docs/audio_ingestion.md index 55fcbc132..399ea7a64 100644 --- a/docs/audio_ingestion.md +++ b/docs/audio_ingestion.md @@ -132,7 +132,7 @@ When using Helm deployment, the Audio NIM service requires an additional GPU. The `APP_NVINGEST_SEGMENTAUDIO` environment variable controls whether audio segmentation is enabled during the ingestion process. -When set to `True`, NV-Ingest will segment audio files based on commas and other punctuation marks, resulting in more granular audio chunks. This can improve downstream processing and retrieval accuracy for audio content. Note that splitting on captions will occur regardless of this setting; enabling `APP_NVINGEST_SEGMENTAUDIO` simply adds additional segmentation based on punctuation. +When set to `True`, NeMo Retriever Library will segment audio files based on commas and other punctuation marks, resulting in more granular audio chunks. This can improve downstream processing and retrieval accuracy for audio content. Note that splitting on captions will occur regardless of this setting; enabling `APP_NVINGEST_SEGMENTAUDIO` simply adds additional segmentation based on punctuation. To enable audio segmentation, add the following export command to your environment configuration: diff --git a/docs/change-model.md b/docs/change-model.md index d0173462a..871d8f5f3 100644 --- a/docs/change-model.md +++ b/docs/change-model.md @@ -46,6 +46,10 @@ The `nemotron-3-nano-30b` model has different naming conventions depending on th Both names refer to the same underlying model. Use the appropriate name based on your deployment type. +##### Nemotron 3 Super + +Nemotron 3 Super is a larger model with different GPU and environment requirements: local NIM deployment requires at least 2 GPUs (FP8 TP2), and you may need a dedicated prompt config and reasoning settings. For full deployment steps (Docker and Helm), see the [Nemotron 3 Super deployment guide](nemotron3-super-deployment.md). + ### Change the Embedding Model @@ -77,7 +81,7 @@ Always use same embedding model or model having same tokinizers for both ingesti ### Configure Embedding Dimensions -The default embedding model (`nvidia/llama-3.2-nv-embedqa-1b-v2`) uses **2048 dimensions** by default. When changing to a different embedding model, you may need to update the dimensions to match the model's output. +The default embedding model (`nvidia/llama-nemotron-embed-1b-v2`) uses **2048 dimensions** by default. When changing to a different embedding model, you may need to update the dimensions to match the model's output. **Important:** Some embedding models have **fixed output dimensions** and do not accept a `dimensions` parameter. For example, `nvidia/nv-embedqa-e5-v5` always outputs 1024-dimensional embeddings. If you use such a model without configuring the dimensions, you may encounter an error like: @@ -124,13 +128,13 @@ You can specify the model for NVIDIA NIM containers to use in the [nims.yaml](.. image: nvcr.io/nim/: ... - nemoretriever-embedding-ms: - container_name: nemoretriever-embedding-ms + nemotron-embedding-ms: + container_name: nemotron-embedding-ms image: nvcr.io/nim/: - nemoretriever-ranking-ms: - container_name: nemoretriever-ranking-ms + nemotron-ranking-ms: + container_name: nemotron-ranking-ms image: nvcr.io/nim/: ``` @@ -173,11 +177,11 @@ Use this procedure to change models when you are running self-hosted NVIDIA NIM # === Embeddings === APP_EMBEDDINGS_MODELNAME: "" - APP_EMBEDDINGS_SERVERURL: "nemoretriever-embedding-ms:8000/v1" + APP_EMBEDDINGS_SERVERURL: "nemotron-embedding-ms:8000/v1" # === Reranker === APP_RANKING_MODELNAME: "" - APP_RANKING_SERVERURL: "nemoretriever-ranking-ms:8000" + APP_RANKING_SERVERURL: "nemotron-ranking-ms:8000" ``` 3. Configure the NIM microservices that host those models. Replace `:` with the image you selected (format `nvcr.io/nim/:`) in [values.yaml](../deploy/helm/nvidia-blueprint-rag/values.yaml). @@ -215,7 +219,7 @@ Use this procedure to change models when you are running self-hosted NVIDIA NIM enabled: true replicas: 1 service: - name: "nemoretriever-embedding-ms" + name: "nemotron-embedding-ms" image: # nvcr.io/nim/: repository: nvcr.io/nim/ @@ -237,7 +241,7 @@ Use this procedure to change models when you are running self-hosted NVIDIA NIM enabled: true replicas: 1 service: - name: "nemoretriever-ranking-ms" + name: "nemotron-ranking-ms" image: # nvcr.io/nim/: repository: nvcr.io/nim/ @@ -264,7 +268,19 @@ Use this procedure to change models when you are running self-hosted NVIDIA NIM **If only the vLLM profile is available** When only a vLLM profile is available for a model, such as on H100 and RTX GPUs, you must use the vLLM engine. First [run the list-model-profiles command](model-profiles.md#list-available-profiles) to confirm which profiles are available and then apply the following configurations. - + **For Nemotron Nano Models VLLM profile** + + When deploying `nvidia/nvidia-nemotron-nano-9b-v2` or `nvidia/nemotron-3-nano`, check if `tensorrt_llm` profile is available using below command for your required model. + + ```bash + # Change model name as needed + USERID=$(id -u) docker run --rm --gpus all \ + nvcr.io/nim/nvidia/nvidia-nemotron-nano-9b-v2:latest \ + list-model-profiles + ``` + + If only `vllm` profile is available, you must use the **vLLM engine** and add these specific configurations: + ```yaml nimOperator: nim-llm: @@ -292,4 +308,5 @@ Use this procedure to change models when you are running self-hosted NVIDIA NIM - [Deploy with Docker (Self-Hosted Models)](deploy-docker-self-hosted.md) - [Deploy with Docker (NVIDIA-Hosted Models)](deploy-docker-nvidia-hosted.md) - [Deploy with Helm](deploy-helm.md) +- [Nemotron 3 Super deployment (Docker and Helm)](nemotron3-super-deployment.md) - [Service-Specific API Keys](api-key.md#service-specific-api-keys) diff --git a/docs/change-vectordb.md b/docs/change-vectordb.md index a4dc993b8..36f4d4f9f 100644 --- a/docs/change-vectordb.md +++ b/docs/change-vectordb.md @@ -1001,7 +1001,7 @@ Update your [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml) fil ### Disable Default Vector Database and Add Custom Helm Chart -1. **Disable Milvus in the NV-Ingest configuration:** +1. **Disable Milvus in the NeMo Retriever Library configuration:** ```yaml nv-ingest: enabled: true diff --git a/docs/conf.py b/docs/conf.py index f0ffa9e07..9c2a17c11 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2025-%Y, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,10 +23,10 @@ import os import sys -project = " NVIDIA-RAG-blueprint" -copyright = "2025, NVIDIA Corporation" +project = " NVIDIA RAG blueprint" +copyright = "2025-%Y, NVIDIA Corporation" author = "NVIDIA Corporation" -release = "2.4.0" +release = "2.5.0" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration @@ -74,8 +74,7 @@ "icon": "fa-brands fa-github", } ], - # Version switcher disabled: set "switcher": {"json_url": "...", "version_match": release} - # and ensure versions1.json is at the json_url path when using versioned doc deployments. + "switcher": {"json_url": "../versions1.json", "version_match": release}, "extra_head": { """ @@ -88,6 +87,7 @@ }, } + # Add any paths that contain custom static files (such as style sheets) here, html_css_files = ["swagger-nvidia.css"] diff --git a/docs/continuous-ingestion-object-storage.md b/docs/continuous-ingestion-object-storage.md new file mode 100644 index 000000000..4baa9daf7 --- /dev/null +++ b/docs/continuous-ingestion-object-storage.md @@ -0,0 +1,127 @@ + +# Continuous Ingestion from Object Storage RAG Blueprint + +Continuous ingestion from object storage connects the [RAG blueprint](readme.md) to continuous integration. This enables an event-driven pipeline that automatically indexes documents. Continuous integration means that when you add documents to a storage bucket, the system detects new uploads, routes them for processing, and indexes their content—making all data immediately searchable and available for analysis through the [RAG Frontend](user-interface.md). + +## Hardware Requirements + +| Requirement | Details | +|-------------|---------| +| **GPU** | 2x RTX PRO 6000 Blackwell or 2x H100 | +| **OS** | Ubuntu 22.04 or later | +| **Docker** | Docker 24.0+ with Docker Compose v2 | +| **NVIDIA Driver** | 570+ | +| **NVIDIA Container Toolkit** | Required | + + +## Overview + +You can create an event-driven continuous ingestion pipeline that works as follows: + +1. Upload documents to object storage. + +2. The system detects new uploads via storage events and routes them for processing. + +3. Content is automatically indexed into the RAG vector store. + +4. You can then query the ingested content through the RAG UI or API. + +Continuous ingestion supports documents such as PDF, DOCX, and other formats supported by the [ingestor](api-ingestor.md). + +## Architecture + +The continuous ingestion architecture features the following high-level flow: + +1. Object storage: Files are written to storage using a protocol that emits events (for example, MinIO configured with Kafka notifications). + +2. Event trigger: Upload events are published to a Kafka topic. + +3. Consumer: A Kafka consumer subscribes to the topic, retrieves the events, downloads the corresponding files from object storage, and routes them for processing. + +4. Document path: Files are passed to a file-based processing pipeline (such as the NeMo Retriever Library or ingestor-server) and then indexed in the vector database. + +The continuous ingestion architecture follows the end-to-end sequence described above and can be summarized as: + +- Document ingestion flow: (1) → (2) → (3) → file-based processing → VectorDB → RAG Agent. + +## Implementation Components + +The reference implementation includes the following components: + +- Object storage (MinIO): A bucket configured with Kafka notifications on put (and optionally delete) events. + +- Kafka: A broker and topic (for example, aidp-topic) used to publish storage event notifications. + +- Kafka consumer: A service that: + +-- Subscribes to the Kafka topic and consumes storage events. + +-- Downloads new objects from MinIO. + +-- Sends files to the RAG ingestor for indexing. + +The deployment is defined in `examples/rag_event_ingest/deploy/docker-compose.yaml`, which runs MinIO, Kafka, and the Kafka consumer on the same Docker network as the RAG stack (`nvidia-rag`). + +### Prerequisites + +- [Deploy the NVIDIA RAG Blueprint](deploy-docker-self-hosted.md) (NIMs, Milvus, ingestor-server, RAG server) so the consumer can reach the ingestor and the rest of the stack. +- Ensure the `nvidia-rag` Docker network exists (created by the RAG deployment). +- For the notebook, clone the repo, set `NGC_API_KEY`, and have the required hardware (see notebook for GPU and software requirements). + +### Option 1: Use the Notebook + +The notebook provides a guided walkthrough of the following steps: + +- Environment setup +- NVIDIA RAG deployment +- Continuous ingestion pipeline deployment (Kafka, MinIO, and consumer) +- Testing document uploads with RAG queries +- Cleanup + +To follow along, open and run: [rag_event_ingest.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/rag_event_ingest.ipynb). + +### Option 2: Deploy the Example with Docker Compose + +From the repository root, after the RAG stack is up: + +```bash +docker compose -f examples/rag_event_ingest/deploy/docker-compose.yaml up -d +``` + +This command launches the following components: + +- Kafka (with an optional Kafka UI available on port 8080) +- MinIO (object storage and console using ports 9201 and 9211 in the example) +- Kafka consumer — connects to the ingestor at `INGESTOR_SERVER_URL` (default: `http://ingestor-server:8082`) and uses `COLLECTION_NAME` (default: `aidp_bucket`) + +After deployment, upload documents and query ingested content as follows: + +1. Open the MinIO Console UI at `http://:9211/login`. +2. Log in with the default credentials (`minioadmin` / `minioadmin`). +3. Navigate to the `aidp-bucket` bucket and upload your documents (PDF, DOCX, etc.). +4. The system automatically publishes upload events to Kafka, the consumer retrieves the files, and documents are sent to the ingestor for indexing into the `aidp_bucket` collection. +5. Query the ingested content through the RAG Frontend UI at `http://:8090` (select the `aidp_bucket` collection) or via the RAG API at `http://:8081/generate`. + +### Key Environment Variables + +The following environment variables configure the Kafka consumer. For details, refer to `examples/rag_event_ingest/deploy/docker-compose.yaml`. + +Consumer environment variables + +| Variable | Description | Default Value| +|----------|---------|--------| +| `KAFKA_BOOTSTRAP_SERVERS` | Address of the Kafka broker(s). | `kafka:9092` | +| `KAFKA_TOPIC` |Kafka topic used for object storage events. | `aidp-topic` | +| `MINIO_ENDPOINT` | MinIO endpoint in : format. | `minio-source-1:9000` | +| `INGESTOR_SERVER_URL` | Base URL for the RAG ingestor service. | `http://ingestor-server:8082` | +| `COLLECTION_NAME` | Target RAG collection for content indexing. | `aidp_bucket` | + +## Reference + +- [RAG Blueprint deployment (Docker self-hosted)](deploy-docker-self-hosted.md) +- [Ingestor API](api-ingestor.md) +- [Notebook: Document continuous ingestion from object storage](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/rag_event_ingest.ipynb) +- [Example: `examples/rag_event_ingest/`](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/examples/rag_event_ingest/) — Kafka consumer and `deploy/docker-compose.yaml` diff --git a/docs/custom-metadata.md b/docs/custom-metadata.md index dba96e461..ce96b63ba 100644 --- a/docs/custom-metadata.md +++ b/docs/custom-metadata.md @@ -233,12 +233,12 @@ The system automatically manages certain metadata fields that are added to all c | Field Name | Type | Description | Auto-Populated | User Override | |------------|------|-------------|----------------|---------------| | **`filename`** | `string` | Name of the uploaded file | ✅ RAG system | ✅ Yes - define in schema | -| **`page_number`** | `integer` | Page number where content appears (1-indexed) | ✅ nv-ingest | ✅ Yes - define in schema | -| **`start_time`** | `integer` | Start timestamp in milliseconds for audio/video segments | ✅ nv-ingest | ✅ Yes - define in schema | -| **`end_time`** | `integer` | End timestamp in milliseconds for audio/video segments | ✅ nv-ingest | ✅ Yes - define in schema | +| **`page_number`** | `integer` | Page number where content appears (1-indexed) | ✅ NeMo Retriever Library | ✅ Yes - define in schema | +| **`start_time`** | `integer` | Start timestamp in milliseconds for audio/video segments | ✅ NeMo Retriever Library | ✅ Yes - define in schema | +| **`end_time`** | `integer` | End timestamp in milliseconds for audio/video segments | ✅ NeMo Retriever Library | ✅ Yes - define in schema | :::{note} -The following field names are **reserved** by NV-Ingest and cannot be used in custom metadata schemas: `type`, `subtype`, and `location`. These fields are exclusively managed by NV-Ingest during document processing and attempting to use them will result in a validation error. +The following field names are **reserved** by NeMo Retriever Library and cannot be used in custom metadata schemas: `type`, `subtype`, and `location`. These fields are exclusively managed by NeMo Retriever Library during document processing and attempting to use them will result in a validation error. ::: #### System-Managed Field Behavior @@ -246,7 +246,7 @@ The following field names are **reserved** by NV-Ingest and cannot be used in cu - **Auto-Addition**: These fields are automatically added to your collection schema if you don't define them - **Auto-Population**: - `filename` is populated by the RAG system during ingestion - - `page_number`, `start_time`, `end_time` are extracted and populated by nv-ingest during document processing + - `page_number`, `start_time`, `end_time` are extracted and populated by NeMo Retriever Library during document processing - **User Override**: You can define any of these fields in your schema with custom properties (e.g., different description, constraints) - If you provide a definition, your definition takes priority - If you don't provide a definition, the system auto-adds them with default settings @@ -258,7 +258,7 @@ The following field names are **reserved** by NV-Ingest and cannot be used in cu :::{note} **Example**: If you upload a multi-page PDF without defining `page_number` in your schema, the system will: 1. Automatically add the `page_number` field to your collection schema -2. nv-ingest will extract the page number from each chunk during processing +2. NeMo Retriever Library extracts the page number from each chunk during processing 3. The page number will be available for filtering (e.g., `content_metadata["page_number"] == 5`) 4. The page number will appear in citations when generating responses ::: diff --git a/docs/debugging.md b/docs/debugging.md index 66a580419..1fb5aea0d 100644 --- a/docs/debugging.md +++ b/docs/debugging.md @@ -33,7 +33,7 @@ docker logs -f nim-llm-ms watch -n 10 'du -sh ~/.cache/model-cache/' # Check specific container resource usage -docker stats nim-llm-ms nemoretriever-embedding-ms nemoretriever-ranking-ms +docker stats nim-llm-ms nemotron-embedding-ms nemotron-ranking-ms ``` The expected timeline for Docker (Self-Hosted) deployment is the following: @@ -124,12 +124,12 @@ docker ps | grep -E "(ingestor-server|nv-ingest|nemoretriever-embedding|milvus|r milvus-standalone Up 36 minutes (healthy) milvus-minio Up 35 minutes (healthy) milvus-etcd Up 35 minutes (healthy) - nemoretriever-ranking-ms Up 38 minutes (healthy) + nemotron-ranking-ms Up 38 minutes (healthy) compose-page-elements-1 Up 38 minutes compose-nemoretriever-ocr-1 Up 38 minutes compose-graphic-elements-1 Up 38 minutes compose-table-structure-1 Up 38 minutes - nemoretriever-embedding-ms Up 38 minutes (healthy) + nemotron-embedding-ms Up 38 minutes (healthy) nim-llm-ms Up 38 minutes (healthy) ``` @@ -141,7 +141,7 @@ docker ps | grep -E "(ingestor-server|nv-ingest|nemoretriever-embedding|milvus|r # Check ingestor server health with all dependencies curl -X GET "http://localhost:8082/v1/health?check_dependencies=true" | jq -# Verify NV-Ingest runtime is ready for processing +# Verify NeMo Retriever Library runtime is ready for processing curl -X GET "http://localhost:7670/v1/health/ready" # Check embedding service is responding @@ -219,11 +219,11 @@ Start by examining the logs of key ingestion services to identify the specific e # Check ingestor server logs for API errors docker logs ingestor-server --tail 100 -# Check NV-Ingest runtime logs for processing errors +# Check NeMo Retriever Library runtime logs for processing errors docker logs nv-ingest-ms-runtime --tail 100 # Check embedding service logs for model issues -docker logs nemoretriever-embedding-ms --tail 100 +docker logs nemotron-embedding-ms --tail 100 ``` ### 2. Common Ingestion Problems and Solutions @@ -245,15 +245,15 @@ docker logs milvus-standalone --tail 50 **Embedding Service Issues:** ```bash # Check embedding service logs -docker logs nemoretriever-embedding-ms --tail 100 +docker logs nemotron-embedding-ms --tail 100 # Verify GPU availability and memory nvidia-smi ``` -**NV-Ingest Processing Errors:** +**NeMo Retriever Library Processing Errors:** ```bash -# Check NV-Ingest logs for processing errors +# Check NeMo Retriever Library logs for processing errors docker logs nv-ingest-ms-runtime --tail 200 | grep -i error # Check Redis connectivity for task queue @@ -288,7 +288,7 @@ docker logs rag-server --tail 100 docker logs nim-llm-ms --tail 100 # Check ranking service logs for reranking errors -docker logs nemoretriever-ranking-ms --tail 100 +docker logs nemotron-ranking-ms --tail 100 ``` ### 2. Common Retrieval Problems and Solutions diff --git a/docs/deploy-docker-nvidia-hosted.md b/docs/deploy-docker-nvidia-hosted.md index 4487edff3..2aabd06ce 100644 --- a/docs/deploy-docker-nvidia-hosted.md +++ b/docs/deploy-docker-nvidia-hosted.md @@ -111,7 +111,7 @@ Use the following procedure to start all containers needed for this blueprint. ], "processing": [ { - "service": "NV-Ingest", + "service": "NeMo Retriever Library", "status": "healthy", ... } @@ -238,7 +238,7 @@ After the first time you deploy the RAG Blueprint successfully, you can consider - If you don't have a GPU available, you can switch to CPU-only Milvus by following the instructions in [milvus-configuration.md](./milvus-configuration.md). -- If you have a requirement to build the NVIDIA Ingest runtime container from source, you can do it by following instructions [here](https://github.com/NVIDIA/nv-ingest). +- If you have a requirement to build the NeMo Retriever Library runtime container from source, you can do it by following instructions [here](https://github.com/NVIDIA/NeMo-Retriever). diff --git a/docs/deploy-docker-self-hosted.md b/docs/deploy-docker-self-hosted.md index 0efe64ea6..4913be36a 100644 --- a/docs/deploy-docker-self-hosted.md +++ b/docs/deploy-docker-self-hosted.md @@ -110,7 +110,7 @@ Use the following procedure to start all containers needed for this blueprint. USERID=$(id -u) docker compose -f deploy/compose/nims.yaml up -d ``` -5. Check the status of the deployment by running the following code. Wait until all services are up and the `nemoretriever-ranking-ms`, `nemoretriever-embedding-ms` and `nim-llm-ms` NIMs are in healthy state before proceeding further. +5. Check the status of the deployment by running the following code. Wait until all services are up and the `nemotron-ranking-ms`, `nemotron-embedding-ms` and `nim-llm-ms` NIMs are in healthy state before proceeding further. ```bash watch -n 2 'docker ps --format "table {{.Names}}\t{{.Status}}"' @@ -121,10 +121,10 @@ Use the following procedure to start all containers needed for this blueprint. NAMES STATUS nim-llm-ms Up 4 minutes (healthy) - nemoretriever-ranking-ms Up 4 minutes (healthy) + nemotron-ranking-ms Up 4 minutes (healthy) compose-graphic-elements-1 Up 4 minutes compose-page-elements-1 Up 4 minutes - nemoretriever-embedding-ms Up 4 minutes (healthy) + nemotron-embedding-ms Up 4 minutes (healthy) compose-nemoretriever-ocr-1 Up 4 minutes compose-table-structure-1 Up 4 minutes ``` @@ -174,7 +174,7 @@ Use the following procedure to start all containers needed for this blueprint. ], "processing": [ { - "service": "NV-Ingest", + "service": "NeMo Retriever Library", "status": "healthy", ... } @@ -253,10 +253,10 @@ Use the following procedure to start all containers needed for this blueprint. 340bc8210a0d milvus-minio Up 3 minutes (healthy) 0be702b87ad6 milvus-etcd Up 3 minutes (healthy) 62eabf1d9f65 nim-llm-ms Up 10 minutes (healthy) - fe2751bfa734 nemoretriever-ranking-ms Up 10 minutes (healthy) + fe2751bfa734 nemotron-ranking-ms Up 10 minutes (healthy) 7b5ddabf8be7 compose-graphic-elements-1 Up 10 minutes ecfaa5190302 compose-page-elements-1 Up 10 minutes - ea8c7fdf20d1 nemoretriever-embedding-ms Up 10 minutes (healthy) + ea8c7fdf20d1 nemotron-embedding-ms Up 10 minutes (healthy) 6d62008a9b42 compose-nemoretriever-ocr-1 Up 10 minutes 969b9f5c987c compose-table-structure-1 Up 10 minutes ``` @@ -333,11 +333,11 @@ After the first time you deploy the RAG Blueprint successfully, you can consider - For improved accuracy, consider enabling reasoning mode. For details, refer to [Enable thinking](./enable-nemotron-thinking.md). -- NeMo Retriever OCR is now the default OCR service. To use legacy Paddle OCR instead, refer to [OCR Configuration Guide](nemoretriever-ocr.md). +- NeMo Retriever Library OCR is now the default OCR service. To use legacy Paddle OCR instead, refer to [OCR Configuration Guide](nemoretriever-ocr.md). - For advanced users who need direct filesystem access to extraction results, refer to [Ingestor Server Volume Mounting](mount-ingestor-volume.md). -- A single NVIDIA A100-80GB or H100-80GB, B200 GPU can be used to start non-LLM NIMs (nemoretriever-embedding-ms, nemoretriever-ranking-ms, and ingestion services like page-elements, ocr, graphic-elements, and table-structure) for ingestion and RAG workflows. You can control which GPU is used for each service by setting these environment variables in `deploy/compose/.env` file before launching. For a complete list of all services and their default GPU assignments, see [Service Port and GPU Reference](service-port-gpu-reference.md). +- A single NVIDIA A100-80GB or H100-80GB, B200 GPU can be used to start non-LLM NIMs (nemotron-embedding-ms, nemotron-ranking-ms, and ingestion services like page-elements, ocr, graphic-elements, and table-structure) for ingestion and RAG workflows. You can control which GPU is used for each service by setting these environment variables in `deploy/compose/.env` file before launching. For a complete list of all services and their default GPU assignments, see [Service Port and GPU Reference](service-port-gpu-reference.md). ```bash EMBEDDING_MS_GPU_ID=0 diff --git a/docs/deploy-helm-from-repo.md b/docs/deploy-helm-from-repo.md index 04c39829a..e57c9ea26 100644 --- a/docs/deploy-helm-from-repo.md +++ b/docs/deploy-helm-from-repo.md @@ -14,7 +14,7 @@ The following are the core services that you install: - RAG server - Ingestor server -- NV-Ingest +- NeMo Retriever Library ## Prerequisites diff --git a/docs/deploy-helm.md b/docs/deploy-helm.md index bf8e792c5..18940aaf6 100644 --- a/docs/deploy-helm.md +++ b/docs/deploy-helm.md @@ -14,7 +14,7 @@ The following are the core services that you install: - RAG server - Ingestor server -- NV-Ingest +- NeMo Retriever Library ## Prerequisites @@ -37,7 +37,7 @@ Plan for additional space if you are enabling persistence for multiple services. 4. Verify that you have Kubernetes v1.34.2 installed and running on Ubuntu 22.04/24.04. For more information, see [Kubernetes documentation](https://kubernetes.io/docs/setup/) and [NVIDIA Cloud Native Stack](https://github.com/NVIDIA/cloud-native-stack). -5. Verify that you have installed Helm 3. To install Helm 3 (and avoid Helm 4), follow the official Helm v3 installation instructions for your platform, for example by using the `get-helm-3` script described in the [Helm documentation](https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3). +5. Verify that you have installed Helm 3. To install Helm 3 (and avoid Helm 4), follow the official Helm v3 installation instructions for your platform, for example by using the `get-helm-3` script described in the [Helm documentation](https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3). 6. Verify that you have a default storage class available in the cluster for PVC provisioning. One option is the local path provisioner by Rancher. Refer to the [installation](https://github.com/rancher/local-path-provisioner?tab=readme-ov-file#installation) section of the README in the GitHub repository. @@ -87,7 +87,7 @@ To deploy End-to-End RAG Server and Ingestor Server, use the following procedure 2. Install the Helm chart by running the following command. ```sh - helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvstaging/blueprint/charts/nvidia-blueprint-rag-v2.4.0.tgz \ + helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvidia/blueprint/charts/nvidia-blueprint-rag-v2.5.0.tgz \ --username '$oauthtoken' \ --password "${NGC_API_KEY}" \ --set imagePullSecret.password=$NGC_API_KEY \ @@ -112,7 +112,7 @@ To deploy End-to-End RAG Server and Ingestor Server, use the following procedure Then install using the modified values.yaml: ```sh - helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvstaging/blueprint/charts/nvidia-blueprint-rag-v2.4.0.tgz \ + helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvidia/blueprint/charts/nvidia-blueprint-rag-v2.5.0.tgz \ --username '$oauthtoken' \ --password "${NGC_API_KEY}" \ --set imagePullSecret.password=$NGC_API_KEY \ @@ -125,6 +125,8 @@ To deploy End-to-End RAG Server and Ingestor Server, use the following procedure Refer to [NIM Model Profile Configuration](model-profiles.md) for using non-default NIM LLM profile. ::: + For **Nemotron 3 Super** on Helm, see the [Nemotron 3 Super deployment guide](nemotron3-super-deployment.md#helm-deployment-nemotron-3-super). + ## Verify a Deployment @@ -146,11 +148,11 @@ To verify a deployment, use the following procedure. NAME READY STATUS RESTARTS AGE ingestor-server-6cc886bcdf-6rfwm 1/1 Running 0 54m milvus-standalone-7dd5db4755-ctqzg 1/1 Running 0 54m - nemoretriever-embedding-ms-86f75c8f65-dfhd2 1/1 Running 0 39m + nemotron-embedding-ms-86f75c8f65-dfhd2 1/1 Running 0 39m nemoretriever-graphic-elements-v1-67d9d65bdc-ftbkw 1/1 Running 0 33m nemoretriever-ocr-v1-78f56cddb9-f4852 1/1 Running 0 40m nemoretriever-page-elements-v3-56ddcf9b4b-qsg82 1/1 Running 0 49m - nemoretriever-ranking-ms-5ff774889f-fwrlm 1/1 Running 0 40m + nemotron-ranking-ms-5ff774889f-fwrlm 1/1 Running 0 40m nemoretriever-table-structure-v1-696c9f5665-l9sxn 1/1 Running 0 37m nim-llm-7cb9bdcc89-hwpkq 1/1 Running 0 11m nim-llm-cache-job-77hpc 0/1 Completed 0 94s @@ -209,11 +211,11 @@ To verify a deployment, use the following procedure. NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE ingestor-server ClusterIP 10.107.12.217 8082/TCP 54m milvus ClusterIP 10.99.110.203 19530/TCP,9091/TCP 54m - nemoretriever-embedding-ms ClusterIP 10.104.99.15 8000/TCP,8001/TCP 54m + nemotron-embedding-ms ClusterIP 10.104.99.15 8000/TCP,8001/TCP 54m nemoretriever-graphic-elements-v1 ClusterIP 10.96.115.45 8000/TCP,8001/TCP 54m nemoretriever-ocr-v1 ClusterIP 10.100.107.215 8000/TCP,8001/TCP 54m nemoretriever-page-elements-v3 ClusterIP 10.102.237.196 8000/TCP,8001/TCP 54m - nemoretriever-ranking-ms ClusterIP 10.96.114.244 8000/TCP,8001/TCP 54m + nemotron-ranking-ms ClusterIP 10.96.114.244 8000/TCP,8001/TCP 54m nemoretriever-table-structure-v1 ClusterIP 10.107.227.139 8000/TCP,8001/TCP 54m nim-llm ClusterIP 10.104.60.155 8000/TCP,8001/TCP 54m rag-etcd ClusterIP 10.104.74.116 2379/TCP,2380/TCP 54m @@ -250,7 +252,7 @@ Port-forwarding is provided as a quick method to try out the UI. However, large To change an existing deployment, after you modify the [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml) file, run the following code. ```sh -helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvstaging/blueprint/charts/nvidia-blueprint-rag-v2.4.0.tgz \ +helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvidia/blueprint/charts/nvidia-blueprint-rag-v2.5.0.tgz \ --username '$oauthtoken' \ --password "${NGC_API_KEY}" \ --set imagePullSecret.password=$NGC_API_KEY \ diff --git a/docs/documentation.md b/docs/documentation.md index de15a9254..53b943dfa 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -8,6 +8,9 @@ - [Documentation Development](#documentation-development) - [Build the Documentation](#build-the-documentation) - [Live Building](#live-building) + - [Documentation Version](#documentation-version) + - [Publishing multiple versions on the public site](#publishing-multiple-versions-on-the-public-site) + - [Multi-version build script](#multi-version-build-script) ## Build the Documentation @@ -40,4 +43,46 @@ The three files below control the version switcher. Before you attempt to publis * docs/versions1.json * docs/project.json -* docs/conf.py \ No newline at end of file +* docs/conf.py + +Validate the manifest and that `release` matches `project.json` before building: + +```sh +uv run python docs/scripts/verify_doc_version_manifest.py +``` + +### Publishing multiple versions on the public site + +Use the **same** `docs/versions1.json` content for every release line you build (list every published version; `preferred` should be `true` only for the default, usually the latest). On each **release branch or tag**, set `release` in `conf.py` and `version` in `project.json` to that line’s version (for example `2.4.0` on the `2.4.x` branch), then build: + +```sh +uv run --group docs sphinx-build . _build/html +``` + +Deploy the HTML so each line lives as a **sibling** folder, for example `2.3.0/`, `2.4.0/`, `2.5.0/`. The theme resolves `../versions1.json` from the version **index** page to a file **next to** those folders (the parent directory). Copy the same `docs/versions1.json` to that parent as `versions1.json` when you publish, or ensure your pipeline deploys it there once per release. If you add a version to the manifest, rebuild (or redeploy) each affected tree and refresh the root `versions1.json`; invalidate CDN cache if the menu still looks stale. + +### Multi-version build script + +From the repository root, you can build several release lines into one tree: `docs/_build/multiversion/{version}/` plus a root `versions1.json`. The script reads your current `docs/versions1.json` as the canonical manifest, then for each version checks out git tag `v{version}` if it exists, otherwise branch `release-v{version}`, writes that manifest into `docs/versions1.json`, runs the verifier, and runs Sphinx. Your original `HEAD` is restored at the end. + +Preview which refs will be used (no git or build): + +```powershell +.\docs\scripts\build_multiversion_docs.ps1 -DryRun +``` + +Full build (requires a clean working tree, or pass `-AllowDirty`): + +```powershell +.\docs\scripts\build_multiversion_docs.ps1 -Versions @('2.3.0','2.4.0','2.5.0') +``` + +On Linux or macOS: + +```sh +chmod +x docs/scripts/build_multiversion_docs.sh +./docs/scripts/build_multiversion_docs.sh --dry-run +./docs/scripts/build_multiversion_docs.sh --versions 2.3.0,2.4.0,2.5.0 +``` + +Serve the result locally, for example: `python -m http.server 8080 --directory docs/_build/multiversion` and open `http://localhost:8080/2.5.0/` to confirm the switcher. \ No newline at end of file diff --git a/docs/enable-nemotron-thinking.md b/docs/enable-nemotron-thinking.md index 182a09ea1..dc95b4285 100644 --- a/docs/enable-nemotron-thinking.md +++ b/docs/enable-nemotron-thinking.md @@ -19,9 +19,106 @@ This guide explains how to enable reasoning for different Nemotron models, each | Model | Control Method | Thinking Budget Parameters | |-------|----------------|----------------------------| +| Nemotron 3 (Nano 30B, and others) | Environment variables | `LLM_ENABLE_THINKING`, `LLM_REASONING_BUDGET`, `LLM_LOW_EFFORT` | | Nemotron 1.5 | System prompts | None | | Nemotron-3-Nano 9B | System prompts | min/max thinking tokens | -| Nemotron-3-Nano 30B | Environment variable | max thinking tokens only | + +## Enable Reasoning for Nemotron 3 Models + +Nemotron 3 models (such as `nvidia/nemotron-3-nano-30b-a3b`) use environment variables to control reasoning. + +Set the following environment variables on the RAG server container (via Docker Compose, Helm values, or shell export): + +**`LLM_ENABLE_THINKING`** +: Enable or disable the reasoning phase. When `true`, the model emits reasoning tokens before the final answer. Default: `false`. + +**`LLM_REASONING_BUDGET`** +: Maximum number of tokens allocated for reasoning. Only used when `LLM_ENABLE_THINKING` is `true`. Default: `0`. + +**`LLM_LOW_EFFORT`** +: Low-effort reasoning mode for faster, cheaper responses with shorter reasoning. Only used when `LLM_ENABLE_THINKING` is `true`. Default: `false`. + +**`FILTER_THINK_TOKENS`** +: Filter content between `` and `` tags in model responses. Keep `true` for production to return only the final answer. Set `false` to see the full reasoning process. Default: `true`. + +:::{important} +**Disabling reasoning:** To disable reasoning, set **`LLM_ENABLE_THINKING=false`**. Setting `LLM_REASONING_BUDGET=0` alone does not disable reasoning: when the budget is `0`, the RAG pipeline does not pass it to the LLM, and the model uses its default reasoning behavior. Always set `LLM_ENABLE_THINKING=false` to turn reasoning off. +::: + +## Enable Reasoning for Nemotron 3 Models + +Nemotron 3 models (such as `nvidia/nemotron-3-super-120b-a12b` and `nvidia/nemotron-3-nano-30b-a3b`) use environment variables to control reasoning. + +### Basic Configuration + +```bash +export LLM_ENABLE_THINKING=true +``` + +### Configure Reasoning Budget (Optional) + +Limit the number of reasoning tokens to control latency and cost: + +```bash +export LLM_ENABLE_THINKING=true +export LLM_REASONING_BUDGET=8192 +``` + +### Low-Effort Mode (Optional) + +For faster responses where deep reasoning is unnecessary: + +```bash +export LLM_ENABLE_THINKING=true +export LLM_LOW_EFFORT=true +``` + +### Configure Model Parameters + +After you enable reasoning, configure the model parameters for optimal reasoning performance: + +```bash +export LLM_TEMPERATURE=0.6 +export LLM_TOP_P=0.95 +``` + +### Nemotron-3-Nano 30B + +For `nvidia/nemotron-3-nano-30b-a3b`, reasoning is controlled with the same `LLM_ENABLE_THINKING` variable. The reasoning budget can be set with either `LLM_REASONING_BUDGET` or `LLM_MAX_THINKING_TOKENS`: + +```bash +export LLM_ENABLE_THINKING=true +export LLM_REASONING_BUDGET=8192 +``` + +The 30B model also supports a maximum thinking token limit directly in API requests: + +```json +{ + "model": "nvidia/nemotron-3-nano-30b-a3b", + "messages": [ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + "max_thinking_tokens": 8192 +} +``` + +**Thinking budget parameters:** + +**`max_thinking_tokens`** +: Maximum number of reasoning tokens allowed before generating the final answer. + +:::{important} +The key differences for the 30B model are the following: + +- Uses only `max_thinking_tokens` (not `min_thinking_tokens`) +- Reasoning is available in the model output's `reasoning_content` field (not wrapped in `` tags) +- The `reasoning_content` field is present in the model output but isn't exposed in the generate API response +- No filtering is needed because reasoning is already separated from the final answer +::: ## Enable Reasoning for Nemotron 1.5 @@ -81,7 +178,7 @@ export FILTER_THINK_TOKENS=false For most production use cases, keep `FILTER_THINK_TOKENS=true` (default) to provide cleaner responses to end users. ::: -## Enable Reasoning for Nemotron-3-Nano 9B +## Enable Reasoning for Nemotron Nano 9B The `nvidia/nvidia-nemotron-nano-9b-v2` model uses system prompts to control reasoning similar to Nemotron 1.5. It also adds support for thinking budget parameters to control the extent of reasoning. @@ -132,63 +229,6 @@ The key differences for the 9B model are the following: - No filtering is needed because reasoning is already separated from the final answer ::: -## Enable Reasoning for Nemotron-3-Nano 30B - -The `nvidia/nemotron-3-nano-30b-a3b` model uses a different approach for reasoning control. Instead of system prompts, you control reasoning through an environment variable. - -### Enable Reasoning Through an Environment Variable - -Set the environment variable to enable or disable reasoning: - -```bash -# Enable reasoning (default) -export ENABLE_NEMOTRON_3_NANO_THINKING=true - -# Disable reasoning -export ENABLE_NEMOTRON_3_NANO_THINKING=false -``` - -### Configure Thinking Budget (Optional) - -The 30B model supports a maximum thinking token limit to control the reasoning phase: - -```json -{ - "model": "nvidia/nemotron-3-nano-30b-a3b", - "messages": [ - { - "role": "user", - "content": "What is the capital of France?" - } - ], - "max_thinking_tokens": 8192 -} -``` - -**Thinking budget parameters:** - -**`max_thinking_tokens`** -: Maximum number of reasoning tokens allowed before generating the final answer. - -:::{important} -The key differences for the 30B model are the following: - -- Uses only `max_thinking_tokens` (not `min_thinking_tokens`) -- Reasoning is available in the model output's `reasoning_content` field (not wrapped in `` tags) -- The `reasoning_content` field is present in the model output but isn't exposed in the generate API response -- No filtering is needed because reasoning is already separated from the final answer -::: - -### Model Naming - -Use the correct model name based on your deployment: - -**Locally deployed NIMs** -: `nvidia/nemotron-3-nano` - -**NVIDIA-hosted models** -: `nvidia/nemotron-3-nano-30b-a3b` - ## Deploy with Reasoning Enabled After you configure reasoning settings in `prompt.yaml` or environment variables, redeploy your services: @@ -220,6 +260,7 @@ Adjust the thinking budget based on your use case: - **Lower values (1024-4096)**: Faster responses for simpler questions - **Higher values (8192-16384)**: More thorough reasoning for complex queries +- **Low-effort mode**: Use `LLM_LOW_EFFORT=true` for fast, low-cost reasoning when deep thought is not required ::: ## Related Topics diff --git a/docs/evaluate.md b/docs/evaluate.md index 0d83e83b0..2485682dc 100644 --- a/docs/evaluate.md +++ b/docs/evaluate.md @@ -7,6 +7,8 @@ After you [deploy your NVIDIA RAG Blueprint system](readme.md#deployment-options-for-rag-blueprint), you can evaluate it by using [Ragas](https://docs.ragas.io/en/stable/) metrics specifically designed for Large Language Model (LLM) Applications. +For published benchmark results across multiple datasets and configurations, refer to [RAG Accuracy Benchmarks](accuracy-benchmarks.md). + ## Ragas Metrics @@ -36,3 +38,4 @@ For more information, refer to the notebook [Evaluate Your RAG Pipeline with Rag - [NVIDIA RAG Blueprint Documentation](readme.md) - [Get Started](deploy-docker-self-hosted.md) - [Notebooks](notebooks.md) +- [RAG Accuracy Benchmarks](accuracy-benchmarks.md) diff --git a/docs/index.md b/docs/index.md index 941b2f9d8..fc5df2ecf 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,5 +1,6 @@ # NVIDIA RAG Blueprint Documentation @@ -81,6 +82,7 @@ After you deploy the RAG blueprint, you can customize it for your use cases. - Data Ingestion & Processing - [Audio Ingestion Support](audio_ingestion.md) + - [Continuous Ingestion from Object Storage](continuous-ingestion-object-storage.md) - [Custom Metadata Support](custom-metadata.md) - [File System Access to Extraction Results](mount-ingestor-volume.md) - [Multimodal Embedding Support (Early Access)](vlm-embed.md) @@ -110,6 +112,8 @@ After you deploy the RAG blueprint, you can customize it for your use cases. - Evaluation - [Evaluate Your NVIDIA RAG Blueprint System](evaluate.md) + - [RAG Accuracy Benchmarks](accuracy-benchmarks.md) + - [RAG Performance Benchmarks](perf-benchmarks.md) - Governance @@ -141,7 +145,7 @@ After you deploy the RAG blueprint, you can customize it for your use cases. ## Blog Posts -- [NVIDIA NeMo Retriever Delivers Accurate Multimodal PDF Data Extraction 15x Faster](https://developer.nvidia.com/blog/nvidia-nemo-retriever-delivers-accurate-multimodal-pdf-data-extraction-15x-faster/) +- [NVIDIA NeMo Retriever Library Delivers Accurate Multimodal PDF Data Extraction 15x Faster](https://developer.nvidia.com/blog/nvidia-nemo-retriever-delivers-accurate-multimodal-pdf-data-extraction-15x-faster/) - [Finding the Best Chunking Strategy for Accurate AI Responses](https://developer.nvidia.com/blog/finding-the-best-chunking-strategy-for-accurate-ai-responses/) @@ -177,6 +181,7 @@ After you deploy the RAG blueprint, you can customize it for your use cases. :maxdepth: 1 :hidden: + Deploy with Docker (Self-Hosted Models) Deploy with Docker (NVIDIA-Hosted Models) Deploy on Kubernetes with Helm Deploy on Kubernetes with Helm from the repository @@ -211,13 +216,14 @@ After you deploy the RAG blueprint, you can customize it for your use cases. :hidden: Audio Ingestion Support + Continuous Ingestion from Object Storage Custom metadata Support Data Catalog for Collections and Documents File System Access to Results Multimodal Embedding Support (Early Access) OCR Configuration Guide Enhanced PDF Extraction - Standalone NV-Ingest + Standalone NeMo Retriever Library Text-Only Ingestion MCP Server Usage ``` @@ -255,6 +261,8 @@ After you deploy the RAG blueprint, you can customize it for your use cases. :hidden: Evaluate Your RAG System + RAG Accuracy Benchmarks + RAG Performance Benchmarks ``` diff --git a/docs/mig-deployment.md b/docs/mig-deployment.md index bc4793ab1..d2ee3cc5e 100644 --- a/docs/mig-deployment.md +++ b/docs/mig-deployment.md @@ -15,10 +15,10 @@ refer to the [MIG Supported Hardware List](https://docs.nvidia.com/datacenter/te Before you deploy, verify that you have the following: -* A Kubernetes cluster with NVIDIA H100 GPUs +* A Kubernetes cluster with NVIDIA H100 or RTX PRO 6000 GPUs :::{note} - This section showcases MIG support for `NVIDIA H100 80GB HBM3` GPU. The MIG profiles used in the `mig-config.yaml` are specific to this GPU. + This section showcases MIG support for `NVIDIA H100 80GB HBM3` GPU. The MIG profiles used in the `mig-config-h100.yaml` are specific to this GPU. Refer to the [MIG User Guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/) for MIG profiles of other GPU types. ::: @@ -35,9 +35,9 @@ For monitoring deployment progress, refer to [Deploy on Kubernetes with Helm](./ 3. Verify that you have the NGC CLI available on your client computer. You can download the CLI from . -4. Verify that you have Kubernetes v1.34.2 installed and running on Ubuntu 22.04/24.04. For more information, see [Kubernetes documentation](https://kubernetes.io/docs/setup/) and [NVIDIA Cloud Native Stack 17.0](https://github.com/NVIDIA/cloud-native-stack/tree/17.0). +4. Verify that you have Kubernetes v1.34.2 installed and running on Ubuntu 22.04/24.04. For more information, see [Kubernetes documentation](https://kubernetes.io/docs/setup/) and [NVIDIA Cloud Native Stack 17.0](https://github.com/NVIDIA/cloud-native-stack/tree/25.12.0). -5. Verify that you have installed Helm 3 or later (Helm v3.20.0 recommended). For installation instructions, see [Helm Installation](https://helm.sh/docs/intro/install). +5. Verify that you have installed Helm 3. To install Helm 3 (and avoid Helm 4), follow the official Helm v3 installation instructions for your platform, for example by using the `get-helm-3` script described in the [Helm documentation](https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3). 6. Verify that you have a default storage class available in the cluster for PVC provisioning. One option is the local path provisioner by Rancher. Refer to the [installation](https://github.com/rancher/local-path-provisioner?tab=readme-ov-file#installation) section of the README in the GitHub repository. @@ -99,7 +99,7 @@ For monitoring deployment progress, refer to [Deploy on Kubernetes with Helm](./ ## Step 2: Apply the MIG configuration -Edit the MIG configuration file [`mig-config.yaml`](../deploy/helm/mig-slicing/mig-config.yaml) to adjust the slicing pattern as needed. +Edit the MIG configuration file [`mig-config-h100.yaml`](../deploy/helm/mig-slicing/mig-config-h100.yaml) to adjust the slicing pattern as needed. The following example enables a custom configuration with mixed MIG slice sizes on the same GPU. @@ -139,7 +139,7 @@ data: Apply the custom MIG configuration configMap to the node and update the ClusterPolicy, by running the following code. ```bash -kubectl apply -n nvidia-gpu-operator -f mig-slicing/mig-config.yaml +kubectl apply -n nvidia-gpu-operator -f mig-slicing/mig-config-h100.yaml kubectl patch clusterpolicies.nvidia.com/cluster-policy \ --type='json' \ -p='[{"op":"replace", "path":"/spec/migManager/config/name", "value":"custom-mig-config"}]' @@ -151,6 +151,20 @@ Label the node with MIG configuration, by running the following code. kubectl label nodes nvidia.com/mig.config=custom-7x1g10-2x1g20-1x3g40-1x7g80 --overwrite ``` +:::{important} +**For NVIDIA RTX6000 Pro Deployments:** + +Use [`mig-config-rtx6000.yaml`](../deploy/helm/mig-slicing/mig-config-rtx6000.yaml) instead: + +```bash +kubectl apply -n nvidia-gpu-operator -f mig-slicing/mig-config-rtx6000.yaml +kubectl patch clusterpolicies.nvidia.com/cluster-policy \ + --type='json' \ + -p='[{"op":"replace", "path":"/spec/migManager/config/name", "value":"custom-mig-config"}]' +kubectl label nodes nvidia.com/mig.config=custom-rtx6000-4x1g24-2x1g24-1x2g48-1x4g96 --overwrite +``` +::: + Verify that the MIG configuration is successfully applied, by running the following code. ```bash @@ -174,39 +188,26 @@ You should see output similar to the following. Run the following code to install the RAG Blueprint Helm Chart. ```bash -helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvstaging/blueprint/charts/nvidia-blueprint-rag-v2.4.0.tgz \ +helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvidia/blueprint/charts/nvidia-blueprint-rag-v2.5.0.tgz \ --username '$oauthtoken' \ --password "${NGC_API_KEY}" \ --set imagePullSecret.password=$NGC_API_KEY \ --set ngcApiSecret.password=$NGC_API_KEY \ - -f mig-slicing/values-mig.yaml + -f mig-slicing/values-mig-h100.yaml ``` :::{important} **For NVIDIA RTX6000 Pro Deployments:** -If you are deploying on NVIDIA RTX6000 Pro GPUs (instead of H100 GPUs), you need to configure the NIM LLM model profile. The required configuration is already present but commented out in the [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml) file. - -Uncomment and modify the following section under `nimOperator.nim-llm.model` in [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml): -```yaml -model: - engine: tensorrt_llm - precision: "fp8" - qosProfile: "throughput" - tensorParallelism: "1" - gpus: - - product: "rtx6000_blackwell_sv" -``` +If you are deploying on NVIDIA RTX6000 Pro GPUs (instead of H100 GPUs), use [`values-mig-rtx6000.yaml`](../deploy/helm/mig-slicing/values-mig-rtx6000.yaml) and [`mig-config-rtx6000.yaml`](../deploy/helm/mig-slicing/mig-config-rtx6000.yaml) which include the RTX6000-specific MIG profiles and NIM LLM model configuration. -Then install using the modified values.yaml along with MIG values: ```sh -helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvstaging/blueprint/charts/nvidia-blueprint-rag-v2.4.0.tgz \ +helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvidia/blueprint/charts/nvidia-blueprint-rag-v2.5.0.tgz \ --username '$oauthtoken' \ --password "${NGC_API_KEY}" \ --set imagePullSecret.password=$NGC_API_KEY \ --set ngcApiSecret.password=$NGC_API_KEY \ - -f values.yaml \ - -f mig-slicing/values-mig.yaml + -f mig-slicing/values-mig-rtx6000.yaml ``` ::: @@ -235,14 +236,14 @@ You should see output similar to the following. Resource Requested Limit Allocatable Free nvidia.com/mig-1g.10gb (86%) 6.0 (86%) 6.0 7.0 1.0 ├─ milvus-standalone-... 1.0 1.0 -├─ nemoretriever-embedding-ms-... 1.0 1.0 +├─ nemotron-embedding-ms-... 1.0 1.0 ├─ rag-nv-ingest-... 1.0 1.0 ├─ nemoretriever-graphic-elements-v1-... 1.0 1.0 ├─ nemoretriever-page-elements-v3-... 1.0 1.0 └─ nemoretriever-table-structure-v1-... 1.0 1.0 nvidia.com/mig-1g.20gb (100%) 2.0 (100%) 2.0 2.0 0.0 -├─ nemoretriever-ranking-ms-... 1.0 1.0 +├─ nemotron-ranking-ms-... 1.0 1.0 └─ 1.0 1.0 nvidia.com/mig-3g.40gb (100%) 1.0 (100%) 1.0 1.0 0.0 @@ -303,7 +304,7 @@ GPU 3: NVIDIA H100 80GB HBM3 (UUID: ...) * Ensure you have the correct MIG strategy (`mixed`) configured. * Verify that `nvidia.com/mig.config.state` is `success` before deploying. -* Customize `values-mig.yaml` to specify the correct MIG GPU resource requests for each pod. +* Customize `values-mig-h100.yaml` or `values-mig-rtx6000.yaml` to specify the correct MIG GPU resource requests for each pod. diff --git a/docs/mount-ingestor-volume.md b/docs/mount-ingestor-volume.md index a9bbb43c4..ff776e34d 100644 --- a/docs/mount-ingestor-volume.md +++ b/docs/mount-ingestor-volume.md @@ -4,7 +4,7 @@ --> # Ingestor Server Volume Mounting for NVIDIA RAG Blueprint -You can mount a host directory to access NV-Ingest extraction results directly from the filesystem when you use the [NVIDIA RAG Blueprint](readme.md). Designed for advanced developers who need programmatic access to raw extraction results for custom processing pipelines or external vector database integration. +You can mount a host directory to access extraction results from NeMo Retriever Library directly from the filesystem when you use the [NVIDIA RAG Blueprint](readme.md). Designed for advanced developers who need programmatic access to raw extraction results for custom processing pipelines or external vector database integration. ## Configuration diff --git a/docs/multi-collection-retrieval.md b/docs/multi-collection-retrieval.md index cd80c337f..07d05be12 100644 --- a/docs/multi-collection-retrieval.md +++ b/docs/multi-collection-retrieval.md @@ -38,10 +38,10 @@ The reranker settings are configured in `deploy/compose/docker-compose-rag-serve export ENABLE_RERANKER=True # Set reranker model (default is already configured) -export APP_RANKING_MODELNAME="nvidia/llama-3.2-nv-rerankqa-1b-v2" +export APP_RANKING_MODELNAME="nvidia/llama-nemotron-rerank-1b-v2" # Reranker service URL (default is already configured) -export APP_RANKING_SERVERURL="nemoretriever-ranking-ms:8000" +export APP_RANKING_SERVERURL="nemotron-ranking-ms:8000" ``` ### For Helm Deployment @@ -54,7 +54,7 @@ envVars: ENABLE_RERANKER: "True" # Reranker model name (default is already configured) - APP_RANKING_MODELNAME: "nvidia/llama-3.2-nv-rerankqa-1b-v2" + APP_RANKING_MODELNAME: "nvidia/llama-nemotron-rerank-1b-v2" # Reranker service URL (default is already configured) APP_RANKING_SERVERURL: "nemoretriever-reranking-ms:8000" diff --git a/docs/nemoretriever-ocr.md b/docs/nemoretriever-ocr.md index d8ef4f3c5..a76a7c113 100644 --- a/docs/nemoretriever-ocr.md +++ b/docs/nemoretriever-ocr.md @@ -11,17 +11,17 @@ This guide explains the OCR (Optical Character Recognition) services available i The NVIDIA RAG Blueprint supports two OCR services: -1. **NeMo Retriever OCR** (Default) - High-performance OCR service offering 2x+ faster performance +1. **NeMo Retriever Library OCR** (Default) - High-performance OCR service offering 2x+ faster performance 2. **Paddle OCR** (Legacy) - General-purpose OCR service maintained for compatibility :::{tip} -**NeMo Retriever OCR is now the default OCR service** and is recommended for all new deployments due to its superior performance and efficiency. +**NeMo Retriever Library OCR is now the default OCR service** and is recommended for all new deployments due to its superior performance and efficiency. ::: -## NeMo Retriever OCR (Default) +## NeMo Retriever Library OCR (Default) -NeMo Retriever OCR is the default and recommended OCR service for the NVIDIA RAG Blueprint, providing: +NeMo Retriever Library OCR is the default and recommended OCR service for the NVIDIA RAG Blueprint, providing: - **2x+ faster performance** compared to Paddle OCR - Optimized text extraction from documents and images @@ -38,7 +38,7 @@ NeMo Retriever OCR is the default and recommended OCR service for the NVIDIA RAG ### Default Configuration -By default, the NVIDIA RAG Blueprint is configured to use NeMo Retriever OCR with the following settings: +By default, the NVIDIA RAG Blueprint is configured to use NeMo Retriever Library OCR with the following settings: | Variable | Default Value | Description | |----------|---------------|-------------| @@ -49,11 +49,11 @@ By default, the NVIDIA RAG Blueprint is configured to use NeMo Retriever OCR wit ### Hardware Requirements -For detailed hardware requirements and GPU support, refer to the [NeMo Retriever OCR Support Matrix](https://docs.nvidia.com/nim/ingestion/image-ocr/1.2.0/support-matrix.html). +For detailed hardware requirements and GPU support, refer to the [NeMo Retriever Library OCR Support Matrix](https://docs.nvidia.com/nim/ingestion/image-ocr/1.2.0/support-matrix.html). ### Docker Configuration -The NeMo Retriever OCR service is configured in the Docker Compose file with the following key settings: +The NeMo Retriever Library OCR service is configured in the Docker Compose file with the following key settings: - **Image**: `nvcr.io/nim/nvidia/nemoretriever-ocr-v1:1.2.0` - **GPU Memory**: 8192 MB (default) @@ -72,7 +72,7 @@ export OCR_OMP_NUM_THREADS=8 # Set OpenMP threads ## Paddle OCR (Legacy) -Paddle OCR is maintained as a legacy option for compatibility with existing workflows. While still functional, it is recommended to migrate to NeMo Retriever OCR for better performance. +Paddle OCR is maintained as a legacy option for compatibility with existing workflows. While still functional, it is recommended to migrate to NeMo Retriever Library OCR for better performance. ### When to Use Paddle OCR @@ -83,8 +83,6 @@ Consider using Paddle OCR if you: ### Hardware Requirements -For detailed hardware requirements, refer to the [Paddle OCR Support Matrix](https://docs.nvidia.com/nim/ingestion/table-extraction/latest/support-matrix.html#supported-hardware). - ### Docker Configuration The Paddle OCR service configuration: @@ -94,7 +92,7 @@ The Paddle OCR service configuration: - **Ports**: 8009 (HTTP), 8010 (gRPC), 8011 (Metrics) :::{note} -**Legacy Service**: Paddle OCR is maintained as a legacy option. For new deployments, we recommend using the default NeMo Retriever OCR service for better performance. +**Legacy Service**: Paddle OCR is maintained as a legacy option. For new deployments, we recommend using the default NeMo Retriever Library OCR service for better performance. ::: @@ -102,9 +100,9 @@ The Paddle OCR service configuration: ### Docker Compose Deployment -#### Using NeMo Retriever OCR (Default) +#### Using NeMo Retriever Library OCR (Default) -NeMo Retriever OCR is deployed by default when you follow the standard deployment guide. No additional configuration is required. +NeMo Retriever Library OCR is deployed by default when you follow the standard deployment guide. No additional configuration is required. 1. **Prerequisites**: Follow the [deployment guide](deploy-docker-self-hosted.md) for standard setup. @@ -114,7 +112,7 @@ NeMo Retriever OCR is deployed by default when you follow the standard deploymen ``` :::{tip} - NeMo Retriever OCR is included in the default profile and will start automatically. + NeMo Retriever Library OCR is included in the default profile and will start automatically. ::: 3. **Verify Service Status**: @@ -136,7 +134,7 @@ If you need to use Paddle OCR instead: export OCR_MODEL_NAME=paddle ``` -3. **Stop NeMo Retriever OCR if running**: +3. **Stop NeMo Retriever Library OCR if running**: ```bash USERID=$(id -u) docker compose -f deploy/compose/nims.yaml down nemoretriever-ocr ``` @@ -146,7 +144,7 @@ If you need to use Paddle OCR instead: USERID=$(id -u) docker compose -f deploy/compose/nims.yaml --profile paddle up -d ``` -5. **Restart Ingestor Server and NV-Ingest Runtime**: +5. **Restart Ingestor Server and NeMo Retriever Library Runtime**: ```bash docker compose -f deploy/compose/docker-compose-ingestor-server.yaml up -d ``` @@ -156,9 +154,9 @@ If you need to use Paddle OCR instead: ### NVIDIA-Hosted Deployment -#### Using NeMo Retriever OCR (Default) +#### Using NeMo Retriever Library OCR (Default) -Follow the standard [NVIDIA-hosted deployment guide](deploy-docker-nvidia-hosted.md) - NeMo Retriever OCR is the default configuration. +Follow the standard [NVIDIA-hosted deployment guide](deploy-docker-nvidia-hosted.md) - NeMo Retriever Library OCR is the default configuration. #### Using Paddle OCR with NVIDIA-Hosted Deployment @@ -178,13 +176,13 @@ Follow the standard [NVIDIA-hosted deployment guide](deploy-docker-nvidia-hosted ### Helm Deployment -#### Using NeMo Retriever OCR (Default) +#### Using NeMo Retriever Library OCR (Default) -NeMo Retriever OCR is deployed by default with Helm installations. Follow the standard [Helm Deployment Guide](deploy-helm.md) - no additional OCR configuration is required. +NeMo Retriever Library OCR is deployed by default with Helm installations. Follow the standard [Helm Deployment Guide](deploy-helm.md) - no additional OCR configuration is required. #### Using Paddle OCR with Helm -To use Paddle OCR instead of the default NeMo Retriever OCR: +To use Paddle OCR instead of the default NeMo Retriever Library OCR: Modify [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml) to override the OCR service image: @@ -216,7 +214,7 @@ For detailed Helm deployment instructions, see [Helm Deployment Guide](deploy-he ### Environment Variables -| Variable | Description | NeMo Retriever Default | Paddle Default | Required | +| Variable | Description | NeMo Retriever Library Default | Paddle Default | Required | |----------|-------------|------------------------|----------------|----------| | `OCR_GRPC_ENDPOINT` | gRPC endpoint for OCR service | `nemoretriever-ocr:8001` | `paddle:8001` | Yes (on-premises) | | `OCR_HTTP_ENDPOINT` | HTTP endpoint for OCR service | `http://nemoretriever-ocr:8000/v1/infer` | `http://paddle:8000/v1/infer` | Yes | @@ -240,16 +238,16 @@ Replace `workstation_ip` with the actual IP address of the machine running the O ## Switching Between OCR Services -### Migrating from Paddle OCR to NeMo Retriever OCR +### Migrating from Paddle OCR to NeMo Retriever Library OCR -To switch to the default NeMo Retriever OCR service: +To switch to the default NeMo Retriever Library OCR service: 1. **Stop Paddle OCR**: ```bash USERID=$(id -u) docker compose -f deploy/compose/nims.yaml down paddle ``` -2. **Configure NeMo Retriever OCR environment variables**: +2. **Configure NeMo Retriever Library OCR environment variables**: ```bash export OCR_GRPC_ENDPOINT=nemoretriever-ocr:8001 export OCR_HTTP_ENDPOINT=http://nemoretriever-ocr:8000/v1/infer @@ -257,7 +255,7 @@ To switch to the default NeMo Retriever OCR service: export OCR_MODEL_NAME=scene_text_ensemble ``` -3. **Start NeMo Retriever OCR**: +3. **Start NeMo Retriever Library OCR**: ```bash USERID=$(id -u) docker compose -f deploy/compose/nims.yaml up -d nemoretriever-ocr ``` @@ -267,14 +265,14 @@ To switch to the default NeMo Retriever OCR service: docker compose -f deploy/compose/docker-compose-ingestor-server.yaml up -d ``` -### Migrating from NeMo Retriever OCR to Paddle OCR +### Migrating from NeMo Retriever Library OCR to Paddle OCR Follow the steps in [Switching to Paddle OCR](#switching-to-paddle-ocr) above. ## Performance Comparison -| Feature | NeMo Retriever OCR | Paddle OCR | +| Feature | NeMo Retriever Library OCR | Paddle OCR | |---------|-------------------|------------| | **Performance** | 2x+ faster | Baseline | | **GPU Memory** | 8 GB (default) | 3 GB (default) | @@ -299,13 +297,13 @@ Follow the steps in [Switching to Paddle OCR](#switching-to-paddle-ocr) above. 3. **Performance Issues** - Consider increasing `OCR_CUDA_MEMORY_POOL_MB` - - Adjust `OCR_BATCH_SIZE` for NeMo Retriever OCR + - Adjust `OCR_BATCH_SIZE` for NeMo Retriever Library OCR - Verify GPU has sufficient memory ### Getting Logs ```bash -# NeMo Retriever OCR logs +# NeMo Retriever Library OCR logs docker logs nemoretriever-ocr # Paddle OCR logs diff --git a/docs/nemotron-parse-extraction.md b/docs/nemotron-parse-extraction.md index a23dca7f4..0e2fc0b11 100644 --- a/docs/nemotron-parse-extraction.md +++ b/docs/nemotron-parse-extraction.md @@ -62,7 +62,7 @@ When using NVIDIA hosted endpoints, you may encounter rate limiting with larger ## Using Helm -To enable PDF extraction with Nemotron Parse using Helm, you need to enable the Nemotron Parse service and configure the ingestor-server to use it. +To enable PDF extraction with Nemotron Parse using Helm, enable the Nemotron Parse service and configure the ingestor-server to use it. ### Prerequisites - Ensure you have sufficient GPU resources. Nemotron Parse requires a dedicated GPU. @@ -71,7 +71,7 @@ To enable PDF extraction with Nemotron Parse using Helm, you need to enable the To deploy with Nemotron Parse enabled: -Modify [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml) to enable Nemotron Parse: +Modify [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml) to enable Nemotron Parse and configure the ingestor-server: ```yaml # Enable Nemotron Parse NIM @@ -93,9 +93,136 @@ For detailed HELM deployment instructions, see [Helm Deployment Guide](deploy-he :::{note} **Key Configuration Changes:** - `nv-ingest.nimOperator.nemotron_parse.enabled=true` - Enables Nemotron Parse NIM -- `ingestor-server.envVars.APP_NVINGEST_PDFEXTRACTMETHOD="nemotron_parse"` - Configures ingestor to use Nemotron Parse +- `ingestor-server.envVars.APP_NVINGEST_PDFEXTRACTMETHOD="nemotron_parse"` - Configures ingestor to use Nemotron Parse for PDF extraction ::: +## Experimental: Nemotron-parse-only extraction + +:::{note} +The steps in this section describe a nemotron-parse-only pipeline. For production use, the default pipeline (Nemotron Parse with page-elements and table-structure NIMs) is recommended for better accuracy. +::: + +The **default** Nemotron Parse pipeline uses the **page-elements** and **table-structure** NIMs together with the Nemotron Parse NIM in the extraction pipeline. This combination provides better accuracy for PDF and table extraction. +To **experiment** with a nemotron-parse-only extraction pipeline (using only the Nemotron Parse NIM, without OCR, page-elements, graphic-elements, or table-structure NIMs), use the following steps. + +### Key configuration + +- **PDF extraction method** — Set `APP_NVINGEST_PDFEXTRACTMETHOD` to `nemotron_parse` so the ingestor uses Nemotron Parse for PDF text extraction. +- **Table extraction method** — Set `APP_NVINGEST_EXTRACTTABLESMETHOD` to `nemotron_parse` so the ingestor uses Nemotron Parse for table extraction instead of the default YOLOX-based table NIMs. This is required for a nemotron-parse-only pipeline. +- **nv-ingest health check** — Set `COMPONENTS_TO_READY_CHECK` to an empty string (`""`) in the **nv-ingest** service environment. By default, nv-ingest readiness waits for other ingest NIMs. With only Nemotron Parse running, the readiness probe would otherwise never pass. Emptying this value allows nv-ingest to become ready when only Nemotron Parse is available. + +### Using Docker Compose (nemotron-parse-only) + +#### On-prem models + +1. **Prerequisites**: Follow the [deployment guide](deploy-docker-self-hosted.md) up to and including the step labelled "Start all required NIMs." + +2. Start only the Nemotron Parse service (and any other non-ingest services your setup needs): + ```bash + USERID=$(id -u) docker compose --profile rag --profile nemotron-parse -f deploy/compose/nims.yaml up -d + ``` + You can skip the OCR, page-elements, graphic-elements, or table-structure NIMs if you want a nemotron-parse-only pipeline. + +3. Configure the ingestor-server and nv-ingest for nemotron-parse-only. Set these environment variables: + + **Ingestor-server** (ingestor-server environment): + ```bash + export APP_NVINGEST_PDFEXTRACTMETHOD=nemotron_parse + export APP_NVINGEST_EXTRACTTABLESMETHOD=nemotron_parse + ``` + + **nv-ingest** (nv-ingest service environment, e.g. in the compose file where nv-ingest runs): + ```bash + export COMPONENTS_TO_READY_CHECK="" + ``` + This ensures the nv-ingest readiness probe passes when other ingest NIMs are not running. + +4. Deploy the ingestion-server and rag-server containers following the remaining steps in the deployment guide. + +5. Ingest PDFs using the [ingestion API usage notebook](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/ingestion_api_usage.ipynb). + +#### NVIDIA hosted API endpoints + +1. **Prerequisites**: Follow the [deployment guide](deploy-docker-nvidia-hosted.md) up to and including the step labelled "Start the vector db containers from the repo root." + +2. Export variables for the Nemotron Parse API: + ```bash + export NEMOTRON_PARSE_HTTP_ENDPOINT=https://integrate.api.nvidia.com/v1/chat/completions + export NEMOTRON_PARSE_MODEL_NAME=nvidia/nemotron-parse + export NEMOTRON_PARSE_INFER_PROTOCOL=http + ``` + +3. Configure the ingestor-server and nv-ingest for nemotron-parse-only: + + **Ingestor-server**: + ```bash + export APP_NVINGEST_PDFEXTRACTMETHOD=nemotron_parse + export APP_NVINGEST_EXTRACTTABLESMETHOD=nemotron_parse + ``` + + **nv-ingest** (so readiness passes without other NIMs): + ```bash + export COMPONENTS_TO_READY_CHECK="" + ``` + +4. Deploy the ingestion-server and rag-server containers following the remaining steps in the deployment guide. + +5. Ingest PDFs using the [ingestion API usage notebook](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/ingestion_api_usage.ipynb). + +:::{note} +When using NVIDIA hosted endpoints, you may encounter rate limiting with larger file ingestions (>10 files). +::: + +### Using Helm (nemotron-parse-only) + +To run only Nemotron Parse for PDF and table extraction with Helm: + +1. **Prerequisites**: Ensure you have sufficient GPU resources. Nemotron Parse requires a dedicated GPU. + +2. Edit [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml): + + - **Enable Nemotron Parse** and **disable the other ingest NIMs** under `nv-ingest.nimOperator`: + + ```yaml + nv-ingest: + nimOperator: + nemotron_parse: + enabled: true + nemoretriever_ocr_v1: + enabled: false + graphic_elements: + enabled: false + page_elements: + enabled: false + table_structure: + enabled: false + envVars: + COMPONENTS_TO_READY_CHECK: "" + ``` + + - **Configure the ingestor-server** to use Nemotron Parse for both PDF and table extraction: + + ```yaml + ingestor-server: + envVars: + APP_NVINGEST_PDFEXTRACTMETHOD: "nemotron_parse" + APP_NVINGEST_EXTRACTTABLESMETHOD: "nemotron_parse" + ``` + +3. Apply the changes as described in [Change a Deployment](deploy-helm.md#change-a-deployment). + +4. For full Helm deployment steps, see the [Helm Deployment Guide](deploy-helm.md). + +**Summary of nemotron-parse-only Helm settings:** + +| Setting | Purpose | +|---------|---------| +| `nv-ingest.nimOperator.nemotron_parse.enabled: true` | Enable the Nemotron Parse NIM. | +| `nv-ingest.nimOperator..enabled: false` | Disable OCR, page-elements, graphic-elements, and table-structure NIMs. | +| `nv-ingest.envVars.COMPONENTS_TO_READY_CHECK: ""` | nv-ingest health check: readiness passes without other NIMs. | +| `ingestor-server.envVars.APP_NVINGEST_PDFEXTRACTMETHOD: "nemotron_parse"` | Use Nemotron Parse for PDF extraction. | +| `ingestor-server.envVars.APP_NVINGEST_EXTRACTTABLESMETHOD: "nemotron_parse"` | Use Nemotron Parse for table extraction. | + ## Limitations and Requirements When using Nemotron Parse for PDF extraction, consider the following: @@ -105,7 +232,7 @@ When using Nemotron Parse for PDF extraction, consider the following: - The extraction quality may vary depending on the PDF structure and content. - Nemotron Parse is not supported on NVIDIA B200 GPUs or RTX Pro 6000 GPUs. -For detailed information about hardware requirements and supported GPUs for all NeMo Retriever extraction NIMs, refer to the [Nemotron Parse Support Matrix](https://docs.nvidia.com/nim/vision-language-models/latest/support-matrix.html#nemotron-parse). +For detailed information about hardware requirements and supported GPUs for extraction NIMs used by NeMo Retriever Library, refer to the [Nemotron Parse Support Matrix](https://docs.nvidia.com/nim/vision-language-models/latest/support-matrix.html#nemotron-parse). ## Available PDF Extraction Methods @@ -115,6 +242,8 @@ The `APP_NVINGEST_PDFEXTRACTMETHOD` environment variable supports the following - `pdfium`: Uses the default PDFium-based extraction - `None`: Uses the default extraction method +**Table extraction method:** The `APP_NVINGEST_EXTRACTTABLESMETHOD` environment variable controls how tables are extracted. Set it to `nemotron_parse` to use Nemotron Parse for table extraction (recommended for a nemotron-parse-only pipeline). The default is `yolox`, which uses the YOLOX-based table NIMs. + :::{note} The Nemotron Parse service requires GPU resources and must run on a dedicated GPU. Make sure you have sufficient GPU resources available before enabling this feature. ::: diff --git a/docs/nemotron3-super-deployment.md b/docs/nemotron3-super-deployment.md new file mode 100644 index 000000000..8b4295945 --- /dev/null +++ b/docs/nemotron3-super-deployment.md @@ -0,0 +1,180 @@ +# Using Nemotron-3-Super-120B-A12B LLM NIM + +[Nemotron-3-Super-120B-A12B](https://build.nvidia.com/nvidia/nemotron-3-super-120b-a12b/modelcard) is a large language model (LLM) trained by NVIDIA, designed to deliver strong agentic, reasoning, and conversational capabilities. It is optimized for collaborative agents and high-volume workloads such as IT ticket automation. This LLM can considerably improve the accuracy of the RAG pipeline, especially with reasoning enabled. ([Model card](https://build.nvidia.com/nvidia/nemotron-3-super-120b-a12b/modelcard)) + +We recommend to use the model with low-effort reasoning mode with a reasoning budget of 256 to have a balance between accuracy and performance. You can switch to non-reasoning mode for maximum performance or use reasoning mode for best accuracy. + +## Hardware requirements + +For Docker and Kubernetes deployment, see the following: + +- **Docker (local NIM):** [Hardware Requirements (Docker)](support-matrix.md#hardware-requirements-docker) +- **Kubernetes (Helm):** [Hardware Requirements (Kubernetes)](support-matrix.md#hardware-requirements-kubernetes) + +For [self-hosted local NIM](deploy-docker-self-hosted.md) deployment with `nemotron-3-super-120b-a12b`, you need one of the following: + +- 3 x H100 +- 3 x B200 +- 3 x RTX PRO 6000 + +### Hardware Requirements (Kubernetes) + +To deploy with [Helm](deploy-helm.md) using `nemotron-3-super-120b-a12b`, you need one of the following: + +- 9 x H100-80GB +- 9 x B200 +- 9 x RTX PRO 6000 + +--- + +## Start services using NVIDIA-hosted models + +No local GPU needed for the LLM. The file `deploy/compose/nemotron3-super-cloud.env` sets all NVIDIA-hosted (cloud) endpoints and the `nemotron-3-super-120b-a12b` model. + +1. [Set your API key](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/docs/api-key.md) and prompt config, then source the env files: + +```bash +export NGC_API_KEY= +source deploy/compose/.env +source deploy/compose/nemotron3-super-cloud.env +export PROMPT_CONFIG_FILE=$(pwd)/deploy/compose/nemotron3-super-prompt.yaml +``` + +2. Follow [Start services using NVIDIA-hosted models](deploy-docker-nvidia-hosted.md#start-services-using-nvidia-hosted-models) to start the vectorstore, rag-server, and ingestor-server. + +--- + +## Start services using self-hosted on-premises models + +1. Update `nims.yaml` + + Edit `deploy/compose/nims.yaml` and change the `nim-llm` service image and GPU allocation: + + ```yaml + nim-llm: + image: nvcr.io/nim/nvidia/nemotron-3-super-120b-a12b:1.8.0 + ... + user: "0" + environment: + NGC_API_KEY: ${NGC_API_KEY} + NIM_MAX_MODEL_LEN: "32768" # required for TP2 profile + NIM_KVCACHE_PERCENT: "0.9" + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['1','2'] # 2 GPUs for FP8 TP2 + capabilities: [gpu] + ``` + + > Note: To deploy TP2 profiles you need to limit NIM_MAX_MODEL_LEN to 32768 + + To confirm that a TP2 profile is available for your hardware, run: + + ```bash + docker run -ti --rm --gpus all nvcr.io/nim/nvidia/nemotron-3-super-120b-a12b:1.8.0 list-model-profiles + ``` + + Check the [model page](https://build.nvidia.com/nvidia/nemotron-3-super-120b-a12b/modelcard) for more details. + + > Note: For RTX 6000 Pro GPUs, additional NIM environment variables are required — see [RTX 6000 Pro](#rtx-6000-pro) below. + +2. Set nemotron-3-super specific environment variables. + + Ensure the section **`Endpoints for using cloud NIMs`** in `deploy/compose/.env` is **commented** (so on-prem endpoints are used). + + ```bash + source deploy/compose/.env + source deploy/compose/nemotron3-super.env + export PROMPT_CONFIG_FILE=$(pwd)/deploy/compose/nemotron3-super-prompt.yaml + export LLM_MAX_TOKENS=16256 + ``` + + Follow [Start services using self-hosted on-premises models](deploy-docker-self-hosted.md#start-services-using-self-hosted-on-premises-models) to start the vectorstore, rag-server, NIMs, and ingestor-server. + +**RTX 6000 Pro** + +> Note: To deploy TP2 profiles on RTX PRO 6000 Blackwell Server Edition, run the following commands. You don't need to go through these steps if you are using TP4 or TP8 profile. + +1. Edit `/etc/default/grub` and set: + + ```text + GRUB_CMDLINE_LINUX_DEFAULT="quiet splash iommu=pt" + ``` + +2. Run: + + ```bash + sudo update-grub2 + sudo reboot + ``` + +3. In `nims.yaml`, add under the `nim-llm` `environment:` block: + + ```yaml + environment: + # In addition to variable already set in step 1 + NCCL_P2P_DISABLE: "1" + ``` + +--- + +## Helm deployment (`nemotron-3-super-120b-a12b`) + +From the repository root, run: + +```bash +helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvidia/blueprint/charts/nvidia-blueprint-rag-v2.5.0.tgz \ + --username '$oauthtoken' \ + --password "${NGC_API_KEY}" \ + --set imagePullSecret.password=$NGC_API_KEY \ + --set ngcApiSecret.password=$NGC_API_KEY \ + -f deploy/helm/nvidia-blueprint-rag/values.yaml \ + -f deploy/helm/nvidia-blueprint-rag/nemotron3-super-values.yaml +``` + +The prompt file `deploy/compose/nemotron3-super-prompt.yaml` is tuned for `nemotron-3-super-120b-a12b`. To customize it, see [Prompt customization in Helm chart](prompt-customization.md#prompt-customization-in-helm-chart). + +**RTX 6000 Pro** + +> Note: To deploy TP2 profiles on RTX PRO 6000 Blackwell Server Edition, run the following commands. You don't need to go through these steps if you are using TP4 or TP8 profile. + +1. Edit `/etc/default/grub` and set: + + ```text + GRUB_CMDLINE_LINUX_DEFAULT="quiet splash iommu=pt" + ``` + +2. Run: + + ```bash + sudo update-grub2 + sudo reboot + ``` + +3. From the repository root, run: + + ```bash + helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvidia/blueprint/charts/nvidia-blueprint-rag-v2.5.0.tgz \ + --username '$oauthtoken' \ + --password "${NGC_API_KEY}" \ + --set imagePullSecret.password=$NGC_API_KEY \ + --set ngcApiSecret.password=$NGC_API_KEY \ + -f deploy/helm/nvidia-blueprint-rag/values.yaml \ + -f deploy/helm/nvidia-blueprint-rag/nemotron3-super-values.yaml \ + -f deploy/helm/nvidia-blueprint-rag/nemotron3-super-rtx6000-values.yaml + ``` + +--- + +## Reasoning and non-reasoning mode + +To disable reasoning mode set following + +```bash +export LLM_ENABLE_THINKING=false +export LLM_REASONING_BUDGET=0 +``` + +For other options (e.g. full reasoning budget), see [Enable reasoning for Nemotron 3 models](enable-nemotron-thinking.md). diff --git a/docs/notebooks.md b/docs/notebooks.md index a88952f79..35b2b176b 100644 --- a/docs/notebooks.md +++ b/docs/notebooks.md @@ -101,7 +101,9 @@ Use the following notebooks to learn comprehensive Python client usage, metadata - [rag_library_usage.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/rag_library_usage.ipynb) – Demonstrates native usage of the NVIDIA RAG Python client, including environment setup, document ingestion, collection management, and querying. This notebook provides end-to-end API usage examples for interacting directly with the RAG system from Python, covering both ingestion and retrieval workflows. -- [rag_library_lite_usage.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/rag_library_lite_usage.ipynb) – Demonstrates containerless deployment of the NVIDIA RAG Python package in lite mode. Uses Milvus Lite (embedded vector database) and NV-Ingest subprocess mode for a simplified setup without Docker containers. Leverages NVIDIA cloud APIs for embeddings, ranking, and LLM inference. **Note**: This mode does not support image/table/chart citations or document summarization. +- [rag_library_lite_usage.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/rag_library_lite_usage.ipynb) – Demonstrates containerless deployment of the NVIDIA RAG Python package in lite mode. Uses Milvus Lite (embedded vector database) and NeMo Retriever Library subprocess mode for a simplified setup without Docker containers. Leverages NVIDIA cloud APIs for embeddings, ranking, and LLM inference. **Note**: This mode does not support image/table/chart citations or document summarization. + +- [langchain_nvidia_retriever.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/langchain_nvidia_retriever.ipynb) – Showcases **LangChain integration** with the NVIDIA RAG Blueprint. Run [ingestion_api_usage.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/ingestion_api_usage.ipynb) first to ingest documents, then use `NVIDIARAGRetriever` for retrieval (sync/async), custom parameters, error handling, and optional RAG chaining with `ChatNVIDIA`. @@ -122,55 +124,7 @@ Use the following notebooks to learn how to how to extend the system with custom Use the following notebook for cloud deployment scenarios. -- [launchable.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/launchable.ipynb) – A deployment-ready notebook intended to run in a [Brev environment](https://console.brev.dev/environment/new). To learn more about Brev, refer to [Brev](https://docs.nvidia.com/brev/latest/about-brev.html). Follow the instructions for running Jupyter notebooks in a cloud-based environment based on the hardware requirements specified in the launchable. - - - -## Set Up the Notebook Environment - -To run a notebook, use the following procedure with [uv](https://docs.astral.sh/uv/) - a fast Python package manager. - -> **Note**: Python version **3.11 or higher** is required. - -1. Install uv (if not already installed): - - ```bash - curl -LsSf https://astral.sh/uv/0.8.12/install.sh | sh - ``` - -2. Create and activate a virtual environment: - - ```bash - uv venv --python=python3.12 - source .venv/bin/activate - ``` - -3. Install JupyterLab: - - ```bash - uv pip install jupyterlab - ``` - -4. Start JupyterLab: - - ```bash - jupyter lab --allow-root --ip=0.0.0.0 --NotebookApp.token='' --port=8889 --no-browser - ``` - -### Set-up Notes -- Ensure that API keys and credentials are correctly set up before you run a notebook. -- Modify endpoints or request parameters as necessary to match your specific use case. -- For the custom VDB operator notebook, ensure that Docker is available for running OpenSearch services. - - - -## Run a Notebook - -After you set up your notebook environment, to run a notebook, use the following procedure. - -1. Access JupyterLab by opening a browser and navigating to `http://:8889`. -2. Navigate to the notebook and run the cells sequentially. - +- [launchable.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/launchable.ipynb) – A deployment-ready notebook intended to run in a [Brev environment](https://console.brev.dev/environment/new). To learn more about Brev, refer to [Brev](https://developer.nvidia.com/brev). Follow the instructions for running Jupyter notebooks in a cloud-based environment based on the hardware requirements specified in the launchable. ## Related Topics diff --git a/docs/nv-ingest-standalone.md b/docs/nv-ingest-standalone.md index f09a37970..14319ad94 100644 --- a/docs/nv-ingest-standalone.md +++ b/docs/nv-ingest-standalone.md @@ -3,19 +3,19 @@ SPDX-License-Identifier: Apache-2.0 --> -# Deploy NV-Ingest Standalone for NVIDIA RAG Blueprint +# Deploy NeMo Retriever Library Standalone for NVIDIA RAG Blueprint -This guide explains how to deploy and use NV-Ingest as a standalone service for [NVIDIA RAG Blueprint](readme.md) without deploying the full ingestor server. This is useful when you want to ingest documents directly using Python scripts. +This guide explains how to deploy and use NeMo Retriever Library as a standalone service for [NVIDIA RAG Blueprint](readme.md) without deploying the full ingestor server. This is useful when you want to ingest documents directly using Python scripts. For more details and advanced usage, refer to: -- [NVIDIA/nv-ingest repository](https://github.com/NVIDIA/nv-ingest) -- [Official NV-Ingest Quickstart Guide](https://github.com/NVIDIA/nv-ingest/blob/main/docs/docs/extraction/quickstart-guide.md) +- [NVIDIA/NeMo-Retriever Library repository](https://github.com/NVIDIA/NeMo-Retriever) +- [Official NeMo Retriever Library Quickstart Guide](https://docs.nvidia.com/nemo/retriever/) ## Limitations -When using NV-Ingest in standalone mode, consider the following limitations: +When using NeMo Retriever Library in standalone mode, consider the following limitations: -1. **Citations Disabled**: The RAG server's citation feature will be disabled for documents ingested through standalone NV-Ingest. This is because the citation metadata requires additional processing that is handled by the full ingestor server. +1. **Citations Disabled**: The RAG server's citation feature will be disabled for documents ingested through standalone NeMo Retriever Library. This is because the citation metadata requires additional processing that is handled by the full ingestor server. 2. **No Web UI**: The standalone deployment does not include the web-based upload interface. All document ingestion must be done through Python scripts. @@ -92,7 +92,7 @@ COLLECTION_NAME = "multimodal_data_nvingest" MILVUS_URI = "http://localhost:19530" MINIO_ENDPOINT = "localhost:9010" -# Server Mode (Create NV-Ingest client) +# Server Mode (Create NeMo Retriever Library client) client = NvIngestClient( message_client_hostname="localhost", message_client_port=7670 @@ -118,10 +118,10 @@ ingestor = ingestor.split( ) ingestor = ingestor.embed( - # For self-hosted: "http://nemoretriever-embedding-ms:8000/v1" + # For self-hosted: "http://nemotron-embedding-ms:8000/v1" # For cloud (NVIDIA-hosted): "https://integrate.api.nvidia.com/v1" - endpoint_url="http://nemoretriever-embedding-ms:8000/v1", - model_name="nvidia/llama-3.2-nv-embedqa-1b-v2" + endpoint_url="http://nemotron-embedding-ms:8000/v1", + model_name="nvidia/llama-nemotron-embed-1b-v2" ) ingestor = ingestor.vdb_upload( diff --git a/docs/observability.md b/docs/observability.md index 0c4bb2665..587c6a70e 100644 --- a/docs/observability.md +++ b/docs/observability.md @@ -45,13 +45,13 @@ Use the following procedure to enable observability with Docker. After tracing is enabled and the system is running, you can **view the traces** in **Zipkin** by opening: -

- -

+```{image} assets/zipkin_ui.png +:width: 750px +:align: center +``` Open the Zipkin UI at: **http://localhost:9411** - ## View Metrics in Grafana As part of the tracing, the RAG service also exports metrics like API request counts, LLM prompt and completion token count and words per chunk. @@ -104,11 +104,10 @@ After tracing is enabled and running, you can view inputs and outputs of differe 3. Similarly, you can view inputs and outputs for sub stages within the workflows by clicking on a substage and finding the `traceloop.entity.input` and `traceloop.entity.ouput` rows. -

- -

- - +```{image} assets/zipkin_ui_labelled.png +:width: 750px +:align: center +``` ## Enable Observability with Helm diff --git a/docs/perf-benchmarks.md b/docs/perf-benchmarks.md new file mode 100644 index 000000000..7029b68bc --- /dev/null +++ b/docs/perf-benchmarks.md @@ -0,0 +1,232 @@ + + +# RAG Performance Measurement Methodology + +[GenAI Perf](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf), NVIDIA’s open-source benchmarking tool evaluates end-to-end RAG pipeline performance under realistic load conditions. By testing across varying levels of concurrency, it offers a consistent and reproducible way to compare different RAG deployment configurations. + +## Key Terms + +| Term | Description | +|------|-------------| +| **Total Requests** | The total number of questions issued to the RAG server in a single benchmark run. Controls the size of the workload and is kept constant across configurations for fair comparison. | +| **Concurrency** | The number of simultaneously active worker threads sending requests to the server. A higher concurrency simulates a heavier multi-user load. | +| **N_Times** | The number of measured benchmark iterations performed after warm-up. Multiple iterations improve statistical stability of the reported metrics. In all experiments reported in this document, `N_Times` is set to 3. | +| **Input Sequence Length (ISL)** | The number of tokens in the input prompt sent to the RAG server. | +| **Output Sequence Length (OSL)** | The number of tokens in the RAG server's generated response. | +| **TTFT (Time to First Token)** | The elapsed time from when a request is submitted until the first output token is returned. A key indicator of perceived responsiveness in streaming deployments. | +| **Inter-Token Latency (ITL)** | Defined as (E2E Latency - TTFT) / (OSL - 1), where OSL is the number of output tokens generated per request, averaged across all requests in the benchmark run. | +| **KV Cache** | A memory buffer on the GPU (HBM) storing the key-value attention states computed for all tokens in an active request. KV cache size grows with sequence length, number of model layers, and hidden dimension. When aggregate KV cache across concurrent requests saturates HBM, new requests queue rather than execute, driving up TTFT. | +| **HBM (High Bandwidth Memory)** | The GPU's on-chip memory (e.g., 80 GB on H100). | +| **Prefill** | The stage in which the model processes the full input prompt simultaneously to construct the KV cache. | +| **Decode** | The autoregressive phase where output tokens are generated one at a time. | +| **Batch / Effective Batch Size** | The set of requests processed simultaneously in a single GPU forward pass during the decode phase. At each decode step, the GPU computes attention over the accumulated KV caches of all requests in the active batch and generates one new token per request. A larger batch means more requests share the same forward pass, increasing per-token contention for GPU memory bandwidth and raising ITL. The effective batch size at any moment is constrained by available HBM: once the aggregate KV cache of active requests saturates HBM, additional requests queue rather than enter the batch. Configurations with a smaller per-request KV cache footprint (smaller model, shorter context) sustain a larger effective batch size under the same HBM budget. | +| **Reasoning Chain / Chain-of-Thought** | An extended internal monologue generated by the model before producing its final answer, used to decompose complex questions into intermediate reasoning steps. For Llama-3.3-Nemotron-Super-49B, reasoning is activated by setting the system prompt to "detailed thinking on" and suppressed by "detailed thinking off" — no model weight change occurs between modes. When active, reasoning tokens are generated autoregressively in the same decode phase as the final answer and occupy KV cache slots for the full duration of the request, increasing TTFT and ITL relative to reasoning-off. | + +## Benchmarking Modes + +The benchmark supports two distinct modes depending on the evaluation objective. + +### Mode 1 — Synthetic Sequence-Length Benchmarking + +In this mode, the benchmarking workload is defined by a target input sequence length and a target output sequence length rather than by real questions. Synthetic queries are programmatically generated to match the specified token lengths, enabling precise control over the load profile and allowing users to isolate the performance impact of sequence length independently of question content. To support retrieval in this mode, a Wikipedia dataset of 50,000 records is pre-ingested into the Vector Database, providing a sufficiently large and diverse document corpus for the retrieval stage to operate under realistic conditions. + +### Mode 2 — Dataset-Driven Benchmarking + +A curated set of domain-specific questions serves as the request pool. To prevent unbounded generation from obscuring true system throughput, the maximum output token length is capped at 32,000 tokens, ensuring responses remain within a well-defined generation budget and that results are directly comparable across runs. + +| Dataset | Number of Questions | Source Documents | Size | QA Characteristics | +|---------|--------------------:|------------------|------|-------------------| +| [RagBattlePacket](https://www.eyelevel.ai/post/most-accurate-rag) | 92 | Deloitte public tax PDFs | 1,146 pages | 92 questions across text, tabular, and graphical categories; visually dense corpus with rich tables and figures requiring cross-modal understanding | +| [KG-RAG](https://github.com/docugami/KG-RAG-datasets/tree/main/sec-10-q/data/v1) | 195 | SEC 10-Q PDFs + KG triples | 1,037 pages | Entity-centric factual QA over structured financial filings; questions target specific named entities and numerical facts with minimal visual content | +| [HotPotQA](https://huggingface.co/datasets/hotpotqa/hotpot_qa) | 979 | Wikipedia paragraphs | ~113K QA pairs; 2,673 source documents | Multi-hop reasoning requiring the model to chain facts across multiple documents (bridge and comparison); plain text only with no tables or figures | +| [BO767](https://digitalcorpora.org/) | 487 | 767 PDFs | 54,730 pages | Varied, heterogeneous content across a large-scale mixed corpus of forensic and operational documents; high proportion of image and structured content alongside text | + +## How It Works +The following sections describe how benchmarking works. +### Request Pool Construction + +Depending on the selected mode, the request pool is constructed differently: + +- **Synthetic mode:** Synthetic prompts are generated with ISL and OSL set to 128. +- **Dataset mode:** Questions are drawn sequentially from the curated benchmark dataset in a round-robin fashion. Once all questions in the dataset have been issued, the cycle restarts from the beginning, continuing until the total number of requests defined by `total_requests` is reached. This ensures uniform dataset coverage regardless of the configured workload size. + +In both modes, the `total_requests` parameter guarantees that every configuration is evaluated against an identical, fixed-size workload, enabling fair and reproducible comparisons across deployment variants. + +### Concurrency Sweep + +GenAI Perf spawns *N* worker threads, where *N* is driven by a concurrency parameter. The blueprint sweeps across the following concurrency levels: **1, 10, 25, 50, 75, 100, 125** — allowing users to observe how the system scales and identify the point at which latency or throughput begins to degrade. + +### Sequential Request Dispatch + +Each thread draws questions from the pool one at a time in sequence, taking the next request only after a response to the current one has been received. This models realistic per-user session behavior and avoids artificially inflating throughput through intra-thread batching. + +### Warm-Up and Measured Runs + +Before recording any metrics, GenAI Perf executes an initial warm-up run to bring the RAG servers to a steady operational state, eliminating cold-start artifacts. The benchmark is then repeated for a configurable number of iterations (`N_Times`), ensuring that the collected statistics are stable and reproducible. + +### Performance Result Collection + +For every request across all threads and iterations, GenAI Perf records timing and outcome data. These are aggregated into a performance result store, from which key metrics — including Time-to-First-Token and Inter-Token Latency — are computed and reported per concurrency level. + +## Configuration and Performance Results +The following sections provide further information about configuration and performance results. +### Configuration and Setup + +The following deployment configurations are evaluated: + +- **LLM-49B** — [Llama-3.3-Nemotron-Super-49B](https://build.nvidia.com/nvidia/llama-3_3-nemotron-super-49b-v1_5/modelcard): A key feature is the reasoning toggle — setting the system prompt to "detailed thinking on" causes the model to generate an internal chain-of-thought before the final answer; "detailed thinking off" produces a direct response. This is a system-prompt switch — no weight change occurs between modes. +- **VLM nano** — [Nemotron Nano VL](https://build.nvidia.com/nvidia/nemotron-nano-12b-v2-vl/modelcard): Its smaller parameter count means significantly lower KV cache memory consumption per request compared to the 49B model. +- **Ingestion setup:** + - Default ingestion: The default ingestion that RAG 2.4.0 uses. + - Default ingestion with VLM image captioning enabled: Details in [Image Captioning Support](image_captioning.md). VLM is enabled during ingestion to extract image and structured content from documents, and also enabled at query time to process retrieved image and structured chunks via the multimodal pipeline. + +| # | LLM Model | Embedding Model | Reasoning On/Off | Ingestion Method | +|---|-----------|----------------|-------------------|-----------------| +| 1 | LLM-49B | Default Embedding | On | Default Ingestion | +| 2 | LLM-49B | Default Embedding | Off | Default Ingestion | +| 3 | VLM nano | Default Embedding | On | Default Ingestion with VLM image captioning enabled | +| 4 | VLM nano | Default Embedding | Off | Default Ingestion with VLM image captioning enabled | + +### Impact Factors + +TTFT (Time to First Token) at high concurrency is primarily determined by KV cache memory pressure. Each active request occupies HBM proportional to sequence length × model layers × hidden dimension. When aggregate KV cache across concurrent requests saturates HBM, incoming requests queue, driving up p95 TTFT. + +Factors governing TTFT: + +- **Model size:** A larger model (49B) has more layers and a wider hidden dimension, consuming more HBM per request than a smaller model (12B) at the same input length. +- **Reasoning chain:** When reasoning is enabled, the model generates a full chain-of-thought before the first answer token. The caller's perceived TTFT includes the full thinking chain duration, and the extended request lifetime occupies KV cache slots for longer, accelerating HBM saturation under concurrency. +- **Image processing pipeline (VLM configurations only):** When VLM inference is enabled and retrieved documents contain chunks with `content_metadata.type = "image"` or `"structured"`, the pipeline fetches thumbnail images from MinIO object storage, encodes them to base64 PNG, and injects them into the prompt alongside text before generation begins. This pre-generation overhead adds directly to TTFT per request, independent of KV cache pressure. + +**ITL (Inter-Token Latency) measures batch contention during the decode phase.** At each decode step, the GPU computes attention over all active requests' accumulated KV caches simultaneously. The more concurrent requests sharing a decode step, the longer each individual token waits — resulting in higher ITL. + +Factors governing ITL: + +- **Effective batch size:** Configurations that fit more concurrent requests into HBM simultaneously (small model + short outputs) produce higher ITL due to greater per-token contention. +- **Output length per request:** Longer outputs per request (reasoning chains) reduce how many requests can coexist in HBM at once, lowering batch contention and ITL. + +### Dataset Mode Results + +The purpose of this mode is to evaluate the performance of RAG under different usage of LLM and VLM, along with enabling reasoning on or off. The following Helm chart configuration is applied for retrieval: + +| # | Configuration | Description | Value | +|---|--------------|-------------|-------| +| 1 | LLM/VLM #GPUs | Number of GPUs allocated to LLM/VLM | 2 | +| 2 | Reranker / Embedding / VectorDB #GPU | Number of GPUs allocated to each service | 1 | +| 3 | Citation | Whether citation source should be returned | Off | +| 4 | VDB K | Number of records taken from Vector DB | 100 | +| 5 | Reranker K | Top number of records returned after reranking | 10 | +| 6 | total_requests | Total requests sent to RAG server per concurrency | MAX(100, 5 × Concurrency) | + +#### KG-RAG + +![KG-RAG — H100 Performance](assets/perf-benchmarks/kgrag_h100_performance.png) + +- TTFT is highest (~210s p95) for the LLM-Reasoning-On configuration due to the large per-request KV cache of the 49B model and extended request lifetimes from reasoning-chain generation. +- TTFT is lowest for VLM-Reasoning-Off, consistent with the VLM nano model's smaller KV cache on a text-dominant structured corpus. +- Both LLM-Reasoning-Off and VLM-Reasoning-On configurations show intermediate TTFT values. +- ITL is highest for VLM-Reasoning-Off (~260ms) because the small VLM nano KV cache allows for a high number of concurrent requests, maximizing batch contention during each decode step. +- VLM-Reasoning-On records significantly lower ITL than VLM-Reasoning-Off — approximately 20% of the reasoning-off value — despite using the same 12B model. When reasoning is enabled, the extended chain-of-thought output occupies KV cache slots for a much longer duration per request, substantially reducing the number of requests the scheduler can hold in the active batch simultaneously. This ITL reduction is proportionally similar to the LLM-Reasoning-On vs. LLM-Reasoning-Off drop, but far larger in absolute milliseconds — because VLM-Reasoning-Off reaches ~260ms due to its large active batch, so shrinking that batch via reasoning has much more room to reduce contention than on the LLM side, where the batch was already small. +- ITL is lowest for LLM-Reasoning-On (< 20ms, near-flat). The 49B model with its long reasoning-chain outputs effectively reduces the concurrent batch size, thereby limiting per-token contention. +- ITL for LLM-Reasoning-Off is intermediate (above LLM-Reasoning-On). This is because the same large 49B model with short outputs permits more requests to share decode steps, leading to increased contention relative to the reasoning-on case. + +#### RagBattlePacket + +![RagBattlePacket — H100 Performance](assets/perf-benchmarks/ragbattlepacket_h100_performance.png) + +- VLM-Reasoning-On and LLM-Reasoning-On converge at the highest TTFT values, both exceeding 140s at concurrency=125. +- For VLM configurations on RagBattlePacket, VLM-enabled ingestion produces chunks with `content_metadata.type = "image"` and `"structured"` in addition to plain text. At query time, the pipeline fetches thumbnails from MinIO and constructs a multimodal prompt (text + images) sent to VLM nano. This image processing overhead — MinIO fetch, base64 PNG encoding, and a larger multimodal prefill — adds directly to TTFT per request. +- LLM-Reasoning-Off records substantially lower TTFT than both VLM configurations. Default ingestion produces text-only chunks and `enable_vlm_inference=False` means no image processing pipeline is triggered — generation begins immediately on a text-only prompt. +- LLM-Reasoning-On records the lowest ITL, remaining near-flat below 20ms throughout. +- VLM-Reasoning-Off records the highest ITL, plateauing near 360ms from concurrency=25 onwards — the steepest plateau observed across all datasets. +- The rapid ITL rise at low concurrency (concurrency=10 to 25) followed by a plateau indicates the system reaches maximum batch occupancy early, after which the scheduler begins queuing rather than further expanding the active batch. +- VLM-Reasoning-On records approximately 6× lower ITL than VLM-Reasoning-Off on this dataset. Both VLM modes go through the same image processing pipeline, so the delta is driven entirely by output length. VLM-Reasoning-Off produces short outputs — the 12B model's small KV cache already allows a large active batch, and short per-request lifetimes keep that batch continuously full, sustaining high decode contention. When reasoning is enabled, each request generates a long chain-of-thought over the visually complex tax documents before producing its final answer, holding each request in the active batch for a far longer duration, preventing the scheduler from admitting new requests and shrinking the effective batch size significantly. The magnitude of the drop is amplified by RagBattlePacket's document complexity, which elicits longer reasoning chains than simpler text corpora. + +#### HotPotQA + +![HotPotQA — H100 Performance](assets/perf-benchmarks/hotpotqa_h100_performance.png) + +- LLM-Reasoning-On produces the highest p95 TTFT of all datasets, exceeding 250s at concurrency=125. + - Reason: HotPotQA's multi-hop questions require chaining facts across multiple source documents, which elicits longer reasoning chains from a thinking-enabled model. Each request holds a large KV cache slot (49B model) for an extended duration, accelerating HBM saturation at scale. +- VLM-Reasoning-Off records the lowest TTFT. + - Reason: HotPotQA source documents are plain Wikipedia text containing no tables or figures, so VLM ingestion does not inflate retrieved context. The model-size advantage of VLM nano translates directly into lower KV cache pressure and reduced queuing latency. +- VLM-Reasoning-Off again records the highest ITL, rising continuously to approximately 330ms at concurrency=125. This indicates the system has not yet reached batch saturation at concurrency=125 on this dataset, consistent with HotPotQA's short Wikipedia paragraph chunks producing compact retrieved contexts that allow the VLM nano model to continue accepting additional concurrent requests at the highest tested concurrency. +- LLM-Reasoning-On remains the lowest ITL configuration, near-flat below 25ms. + +#### BO767 + +![BO767 — H100 Performance](assets/perf-benchmarks/bo767_h100_performance.png) + +- On BO767, LLM-Reasoning-Off achieves lower TTFT than VLM-Reasoning-Off, even though LLM-49B is the larger model. This reversal is driven by the additional image processing pipeline overhead (thumbnail fetch and encoding) that only VLM configurations incur on this visually dense corpus. +- LLM-Reasoning-On records the highest TTFT, reaching approximately 165s at concurrency=125, driven by the 49B model's large per-request KV cache and extended request lifetimes from reasoning-chain generation. +- VLM configurations carry additional per-request TTFT overhead from the image processing pipeline. The BO767 VLM-ingested index contains 45,819 image chunks and 31,030 structured chunks (49.2% of total records). When these chunk types appear in the retrieved top-10, the pipeline fetches thumbnails from MinIO, encodes them to base64 PNG, and sends a multimodal prompt to VLM nano — adding latency before generation begins on every affected request. +- LLM configurations are immune to this overhead regardless of index content. With `enable_vlm_inference=False`, the query-time pipeline performs text-only generation with no MinIO fetch, explaining why LLM-Reasoning-Off achieves lower TTFT than VLM-Reasoning-Off despite being the larger model. +- VLM-Reasoning-Off ITL plateaus at approximately 380ms by concurrency=50, followed by a sustained plateau through concurrency=125 — indicating the system reaches maximum batch occupancy early on this corpus. +- LLM-Reasoning-Off plateaus at approximately 40ms, well below VLM-Reasoning-Off, consistent with the 49B model's larger per-request KV cache limiting the concurrent batch size. +- LLM-Reasoning-On records the lowest ITL, near-flat below 25ms across all concurrency levels. + +### Cross-Dataset Patterns + +**TTFT Ordering (Time to First Token)** — Across all four datasets, the following TTFT ordering holds consistently: + +- LLM-Reasoning-On produces the highest or joint-highest TTFT, driven by the combination of a large per-request KV cache and long request lifetimes from reasoning-chain generation. +- VLM-Reasoning-Off produces the lowest or joint-lowest TTFT, benefiting from the VLM nano model's small KV cache footprint and the absence of reasoning-chain latency. +- The relative ordering of LLM-Reasoning-Off vs. VLM-Reasoning-Off depends on corpus visual content. On visually dense corpora (RagBattlePacket, BO767), VLM-enabled ingestion produces image and structured chunks that trigger per-request image processing overhead at query time (MinIO thumbnail fetch, base64 encoding, multimodal prompt construction), adding directly to TTFT for VLM configurations. LLM configurations bypass this pipeline entirely, achieving lower TTFT despite the larger model. On text-dominant corpora (HotPotQA, KG-RAG), no image processing is triggered and the 12B model's smaller KV cache footprint gives VLM-Reasoning-Off the lower TTFT. + +**ITL Ordering (Inter-Token Latency)** — Across all datasets, the ITL ordering is fully consistent: + +| Rank | Configuration | Mechanism | +|------|--------------|-----------| +| **Highest ITL** | VLM-Reasoning-Off | Small 12B model + short outputs = maximum concurrent requests in HBM = highest batch contention per decode step. | +| **2nd** | LLM-Reasoning-Off | Large 49B model limits concurrency, but short outputs allow moderate batch occupancy. | +| **3rd** | VLM-Reasoning-On | Reasoning chain extends output length, reducing concurrent requests in HBM vs. reasoning-off. | +| **Lowest ITL** | LLM-Reasoning-On | Large 49B model + very long reasoning-chain outputs = minimum concurrent requests in HBM = lowest batch contention. | + +**ITL Plateau Behavior** — VLM-Reasoning-Off ITL plateaus or slightly declines at very high concurrency on several datasets (RagBattlePacket from c=25, BO767 from c=50). This reflects the onset of request queuing: once HBM is saturated, the scheduler queues incoming requests rather than expanding the active decode batch, which caps batch contention and prevents further ITL growth. + +### Synthetic Mode Results + +The purpose of this mode is to re-evaluate the latency difference between LLM and VLM in isolation, removing dataset-specific effects such as visual content and reasoning-chain variability. A Wikipedia dataset of 50,000 records is pre-ingested into the Vector Database, providing a sufficiently large and diverse text-only corpus for the retrieval stage to operate under realistic conditions. The same Helm chart configuration as dataset mode is applied. The workload is fixed at ISL=128, OSL=128, representing an ordinary conversational use case — short questions, short answers — where each request occupies minimal KV cache. + +The same hardware allocation as dataset mode is applied: 2 GPUs for the LLM/VLM model server and 1 GPU each for the reranker, embedding model, and vector database. + +![Wikipedia — H100 LLM vs VLM (Reasoning Off)](assets/perf-benchmarks/wikipedia_synthetic_h100_performance.png) + +With reasoning disabled on both configurations and a uniform text-only corpus, the results expose the pure effect of model size on each metric: + +- **TTFT:** LLM-Reasoning-Off records marginally higher TTFT than VLM-Reasoning-Off across all concurrency levels, reaching approximately 65s vs. 60s at concurrency=125. Both curves rise linearly throughout the tested range, consistent with a chat-style workload where each request places minimal KV cache pressure. This confirms that for conversational workloads, both LLM and VLM configurations sustain responsive TTFT without entering a queuing collapse, and the model-size difference has negligible practical impact on user-perceived latency in this regime. The narrow gap between the two configurations reflects the fact that at these short sequence lengths, both models receive equally small inputs and produce equally short outputs — the KV cache size per request is nearly identical, leaving model parameter count as the only differentiator with a proportionally small impact. + +- **ITL:** The two configurations diverge sharply. LLM-Reasoning-Off ITL plateaus and remains flat at approximately 40ms from concurrency=25 onwards, indicating the decode batch has reached its HBM capacity and the scheduler has begun queuing excess requests beyond that point. VLM-Reasoning-Off ITL rises steeply and continuously, reaching approximately 220ms at concurrency=125 with no plateau visible — the 12B model's small per-request KV cache allows the scheduler to keep admitting more concurrent requests into the active decode batch as concurrency grows, continuously increasing per-token contention. The 49B LLM model's larger KV cache footprint caps the effective batch size early, preventing further ITL growth beyond concurrency=25. + +Taken together, the two metrics reveal two distinct saturation thresholds: decode batch saturation, which the LLM hits at low concurrency and is reflected in the ITL plateau, and full system TTFT saturation, which neither configuration reaches within the tested concurrency range for this chat-scale workload. This synthetic result serves as a clean baseline confirmation of the theoretical framework — on a text-only corpus with no image processing overhead, model size is the sole differentiating factor, producing the expected TTFT ordering (LLM slightly higher) and ITL ordering (VLM significantly higher at scale). + +### Cross-Dataset Latency with LLM-Reasoning-Off + +![LLM-Reasoning-Off — All Datasets (H100)](assets/perf-benchmarks/cross_dataset_llm_reasoning_off.png) + +In addition to per-dataset views, all four benchmarks plus the synthetic Wikipedia workload are aggregated into a single comparison using the same Llama-3.3-Nemotron-Super-49B configuration with reasoning disabled. This chart reports p95 Time To First Token (TTFT) and Inter-Token Latency (ITL) as concurrency increases on a single H100. + +Wikipedia synthetic serves as a lower bound: with fixed ISL=128 and OSL=128, no retrieval, and text-only inputs, it represents the lightest possible workload for the model. All real RAG datasets sit above this baseline. HotPotQA and KG-RAG show the highest TTFT and ITL, reflecting their heavier retrieved contexts (multi-hop Wikipedia reasoning and SEC 10-Q filings). RagBattlePacket falls in the middle, while BO767 exhibits the lowest ITL among the RAG datasets and closely tracks the Wikipedia ITL plateau. Its TTFT remains higher than Wikipedia's because real retrieval still adds pre-generation overhead. + +Taken together, this cross-dataset view confirms that the latency behavior of the 49B LLM is consistent and predictable: for a fixed model and hardware configuration, TTFT increases roughly linearly with concurrency for every dataset, and datasets that supply more complex or extensive retrieved context exhibit proportionally higher TTFT and ITL than the synthetic baseline. + +## Key Takeaways + +**Model size governs the TTFT/ITL trade-off direction, but corpus visual content determines its magnitude.** On text-only corpora, the smaller VLM nano (12B) delivers lower TTFT than the larger LLM-49B due to its reduced KV cache footprint — but this advantage is fully reversed on visually dense corpora (BO767, RagBattlePacket), where the VLM image processing pipeline (MinIO thumbnail fetch, base64 encoding, multimodal prompt construction) adds per-request overhead that outweighs the model-size benefit. As a result, on visually dense corpora, LLM-Reasoning-Off achieves lower TTFT than VLM-Reasoning-Off despite being the larger model — the image processing overhead is the dominant factor, not model size. + +**Reasoning is a force multiplier on TTFT, not just an accuracy switch.** Enabling reasoning on LLM-49B produces the highest TTFT in nearly every dataset tested. The chain-of-thought is generated autoregressively before the first answer token is returned, extending request lifetime and occupying KV cache slots for longer — accelerating HBM saturation under concurrency. This is a system-level cost, not just a per-request latency cost. + +**TTFT and ITL pull in opposite directions by design.** A small model with short outputs lets the scheduler pack more requests in parallel — this keeps TTFT low but creates a large, contended decode batch that drives ITL up. A large model with long reasoning outputs does the opposite: it shrinks the active batch, keeping ITL low but consuming more HBM per request and causing queuing that raises TTFT. Across every dataset tested, the lowest-TTFT configuration always has the highest ITL, and vice versa. No single configuration optimizes both metrics simultaneously — configuration selection is a deliberate trade-off between response latency and decode throughput. + +**ITL plateau is the earliest signal of HBM saturation.** Before TTFT shows non-linear growth, ITL flattening reveals that the scheduler has already begun queuing requests and capping the decode batch. In the synthetic experiment, LLM ITL plateaus at concurrency=25 while TTFT is still rising linearly, consistent with a chat-scale workload (ISL=128, OSL=128) where each request is small and queuing remains well controlled. In this regime, both LLM and VLM configurations keep end-user response times within an acceptable band. + +## Related Topics + +- [RAG Accuracy Benchmarks](accuracy-benchmarks.md) +- [Evaluate Your NVIDIA RAG Blueprint System](evaluate.md) +- [Enable Reasoning in Nemotron LLM Models](enable-nemotron-thinking.md) +- [VLM-Based Inferencing in RAG](vlm.md) +- [Image Captioning Support](image_captioning.md) +- [Best Practices for Common Settings](accuracy_perf.md) diff --git a/docs/project.json b/docs/project.json index 66344b5d0..9b67aad99 100644 --- a/docs/project.json +++ b/docs/project.json @@ -1,4 +1,4 @@ { "name": "NVIDIA-RAG-blueprint", - "version": "2.4.0" + "version": "2.5.0" } \ No newline at end of file diff --git a/docs/python-client.md b/docs/python-client.md index 73b432eaf..5c9bc33ea 100644 --- a/docs/python-client.md +++ b/docs/python-client.md @@ -155,12 +155,12 @@ Verify all containers are running and healthy. ```output NAMES STATUS -nemoretriever-ranking-ms Up ... (healthy) +nemotron-ranking-ms Up ... (healthy) compose-page-elements-1 Up ... compose-nemoretriever-ocr-1 Up ... compose-graphic-elements-1 Up ... compose-table-structure-1 Up ... -nemoretriever-embedding-ms Up ... (healthy) +nemotron-embedding-ms Up ... (healthy) nim-llm-ms Up ... (healthy) ``` @@ -170,32 +170,32 @@ nim-llm-ms Up ... (healthy) `DEPLOYMENT_MODE = "cloud"` -2. Configure NV-Ingest to use NVIDIA hosted cloud APIs using the following hosted models. +2. Configure NeMo Retriever Library to use NVIDIA hosted cloud APIs using the following hosted models. - os.environ["OCR_HTTP_ENDPOINT"] = "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr" - os.environ["OCR_INFER_PROTOCOL"] = "http" os.environ["YOLOX_HTTP_ENDPOINT"] = ( - "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3" + "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3" ) - os.environ["YOLOX_INFER_PROTOCOL"] = "http" - os.environ["YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT"] = ( - "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-graphic-elements-v1" + "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-graphic-elements-v1" ) - os.environ["YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL"] = "http" - os.environ["YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT"] = ( - "https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-table-structure-v1" + "https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-table-structure-v1" ) os.environ["YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL"] = "http" -### Setup NVIDIA Ingest Runtime and Redis Service +### Setup NeMo Retriever Library Runtime and Redis Service -Use the following command to setup your NVIDIA Ingest Runtime and Redis Service. +Use the following command to setup your NeMo Retriever Library Runtime and Redis Service. `docker compose -f ../deploy/compose/docker-compose-ingestor-server.yaml up nv-ingest-ms-runtime redis -d` @@ -247,7 +247,7 @@ if DEPLOYMENT_MODE == "cloud": config_ingestor.llm.server_url = "" # Empty uses NVIDIA API catalog config_ingestor.summarizer.server_url = "" # Empty uses NVIDIA API catalog else: - config_ingestor.embeddings.server_url = "http://nemoretriever-embedding-ms:8000/v1" + config_ingestor.embeddings.server_url = "http://nemotron-embedding-ms:8000/v1" ingestor = NvidiaRAGIngestor(config=config_ingestor) ``` @@ -357,11 +357,11 @@ from nvidia_rag.utils.configuration import NvidiaRAGConfig # "server_url": "", # }, # "embeddings": { -# "model_name": "nvidia/llama-3.2-nv-embedqa-1b-v2", +# "model_name": "nvidia/llama-nemotron-embed-1b-v2", # "server_url": "https://integrate.api.nvidia.com/v1", # }, # "ranking": { -# "model_name": "nvidia/llama-3.2-nv-rerankqa-1b-v2", +# "model_name": "nvidia/llama-nemotron-rerank-1b-v2", # "server_url": "", # }, # }) diff --git a/docs/query_decomposition.md b/docs/query_decomposition.md index 8f346f847..b6826d668 100644 --- a/docs/query_decomposition.md +++ b/docs/query_decomposition.md @@ -26,7 +26,7 @@ Each subquery is processed independently to gather comprehensive context, which ## Accuracy Improvement Example -The following example that uses the [HotpotQA](https://hotpotqa.github.io/) dataset demonstrates the accuracy improvement from enabling query decomposition. +The following example that uses the [Google Frame](https://huggingface.co/datasets/google/frames-benchmark) benchmark demonstrates the accuracy improvement from enabling query decomposition. ```text Query: I am thinking of a Ancient Roman City. The city was destroyed by volcanic eruption. The eruption occurred in the year 79 AD. The volcano was a stratovolcano. Where was the session held where it was decided that the city would be named a UNESCO world heritage site? diff --git a/docs/readme.md b/docs/readme.md index ec8c5cf8e..acc20cd5e 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -113,6 +113,7 @@ After you deploy the RAG blueprint, you can customize it for your use cases. - Evaluation - [Evaluate Your NVIDIA RAG Blueprint System](evaluate.md) + - [RAG Accuracy Benchmarks](accuracy-benchmarks.md) - Governance @@ -147,5 +148,5 @@ After you deploy the RAG blueprint, you can customize it for your use cases. ## Blog Posts -- [NVIDIA NeMo Retriever Delivers Accurate Multimodal PDF Data Extraction 15x Faster](https://developer.nvidia.com/blog/nvidia-nemo-retriever-delivers-accurate-multimodal-pdf-data-extraction-15x-faster/) +- [NVIDIA NeMo Retriever Library Delivers Accurate Multimodal PDF Data Extraction 15x Faster](https://developer.nvidia.com/blog/nvidia-nemo-retriever-delivers-accurate-multimodal-pdf-data-extraction-15x-faster/) - [Finding the Best Chunking Strategy for Accurate AI Responses](https://developer.nvidia.com/blog/finding-the-best-chunking-strategy-for-accurate-ai-responses/) diff --git a/docs/release-notes.md b/docs/release-notes.md index 96c48a121..6dd8e6911 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -8,7 +8,40 @@ This documentation contains the release notes for [NVIDIA RAG Blueprint](readme. -## Release 2.4.0 (26-02-TBD) +## Release 2.5.0 (2026-03-17) + +This release introduces support for the Nemotron-super-3 model, updates NIMs to the latest versions, upgrades NV-Ingest, and adds continuous ingestion along with RTX 6000 MIG support. + +### Highlights + +This release includes the following key updates: + +- **Nemotron-super-3 model support.** You can now integrate the Nemotron-super-3 model by following the steps outlined in [Change the Inference or Embedding Model](change-model.md). +- **NIMs updated to latest versions.** + The following model updates are included: + - `nvidia/llama-3.2-nv-embedqa-1b-v2` → `nvidia/llama-nemotron-embed-1b-v2` + - `nvidia/llama-3.2-nv-rerankqa-1b-v2` → `nvidia/llama-nemotron-rerank-1b-v2` + - `nemoretriever-page-elements-v3` → `nemotron-page-elements-v3` + - `nemoretriever-graphic-elements-v1` → `nemotron-graphic-elements-v1` + - `nemoretriever-table-structure-v1` → `nemotron-table-structure-v1` + - `nvidia/llama-3.2-nemoretriever-1b-vlm-embed-v1` → `nvidia/llama-nemotron-embed-vl-1b-v2` +- Updated NVIngest to [version 26.1.2](https://github.com/NVIDIA/NeMo-Retriever/releases/tag/26.1.2). +- Added an example demonstrating the continuous ingestion pipeline. For more information, see [rag_event_ingest.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/rag_event_ingest.ipynb). +- **Added MIG support for RTX 6000.** For details, refer to [MIG Deployment](mig-deployment.md) and use `values-mig-rtx6000.yaml` and `mig-config-rtx6000.yaml`. +- Added documentation for the experimental Nemotron-parse-only ingestion pipeline. This configuration allows you to perform extraction using only Nemotron Parse through NV-Ingest, without relying on OCR, page-elements, graphic-elements, or table-structure NIMs. For more information, refer to [nemotron-parse-extraction.md](nemotron-parse-extraction.md#experimental-nemotron-parse-only-extraction). +- Several bug fixes, including frontend CVE resolutions, improved multimodal content concatenation for VLM embeddings, enhanced VDB serialization for high-concurrency parallel ingestion, and updates to observability and NeMo Guardrails configurations. +- Added agentic skills support: the `rag-blueprint` skill enables AI coding assistants (Claude Code, Cursor, Codex, etc.) to deploy, configure, troubleshoot, and manage the RAG Blueprint autonomously. For details, refer to [RAG Blueprint Agent Skill](../skill-source/README.md). +- Added [accuracy benchmark results](accuracy-benchmarks.md) across seven public datasets (RagBattlepacket, KG-RAG, Financebench, DC767, HotPotQA, Google Frames, and Vidore), comparing LLM and VLM configurations with reasoning on/off. Benchmarks use the NVIDIA Answer Accuracy metric from RAGAS. + +### Fixed Known Issues + +The following known issues have been resolved in this release: + +- Addressed frontend CVEs. + +- Resolved VDB indexing issues during high-concurrency batch parallel ingestion by implementing VDB serialization. + +## Release 2.4.0 (2026-02-20) This release adds new features to the RAG pipeline for supporting agent workflows and enhances generations with VLMs augmenting multimodal input. @@ -16,10 +49,10 @@ This release adds new features to the RAG pipeline for supporting agent workflow This release contains the following key changes: -- Updated NIMs and code to support [NVIDIA Ingest 26.01 release](https://docs.nvidia.com/nemo/retriever/latest/extraction/releasenotes-nv-ingest/). +- Updated NIMs and code to support [NeMo Retriever Library 26.01 release](https://docs.nvidia.com/nemo/retriever/latest/extraction/releasenotes-nv-ingest/). - Added support for non-NIM models including OpenAI, models hosted on AWS and Azure, OSS models, and others. Supported through service-specific API keys. For details, refer to [Get an API Key](api-key.md). -- The RAG Blueprint now uses [nemoretriever-ocr-v1](https://build.nvidia.com/nvidia/nemoretriever-ocr-v1/modelcard) as the default OCR model. For details, refer to [NeMo Retriever OCR Configuration Guide](nemoretriever-ocr.md). -- The Vision-Language Model (VLM) inference feature now uses the model [nemotron-nano-12b-v2-vl](https://build.nvidia.com/nvidia/nemotron-nano-12b-v2-vl/modelcard). For details, refer to [VLM for Generation](vlm.md). +- The RAG Blueprint now uses [nemoretriever-ocr-v1](https://build.nvidia.com/nvidia/nemoretriever-ocr-v1/modelcard) as the default OCR model. For details, refer to [NeMo Retriever Library OCR Configuration Guide](nemoretriever-ocr.md). +- Improved VLM based generation support. The Vision-Language Model (VLM) inference feature now uses the model [nemotron-nano-12b-v2-vl](https://build.nvidia.com/nvidia/nemotron-nano-12b-v2-vl/modelcard). For details, refer to [VLM for Generation](vlm.md). - User interface improvements including catalog display, image and text query, and others. For details, refer to [User Interface](user-interface.md). - Added ingestion metrics endpoint support with OpenTelemetry (OTEL) for monitoring document uploads, elements ingested, and pages processed. For details, refer to [Observability](observability.md). - Support image and text as input query. For details, refer to [Multimodal Query Support](multimodal-query.md). @@ -40,7 +73,7 @@ This release contains the following key changes: - Shallow summarization support - Easy model switches and dedicated configurations - Ease of prompt changes -- Reserved field names `type`, `subtype`, and `location` for NV-Ingest exclusive use in metadata schemas. +- Reserved field names `type`, `subtype`, and `location` for NeMo Retriever Library exclusive use in metadata schemas. - Added support for [rag_library_lite_usage.ipynb](https://github.com/NVIDIA-AI-Blueprints/rag/blob/main/notebooks/rag_library_lite_usage.ipynb) which demonstrates containerless deployment of the NVIDIA RAG Python package in lite mode. - Added example showcasing [NeMo Agent Toolkit integration](https://github.com/NVIDIA/NeMo-Agent-Toolkit) with NVIDIA RAG. - Added [weighted hybrid search](hybrid_search.md#weighted-hybrid-search) support with configurable weights. @@ -77,7 +110,7 @@ The following are the known issues for the NVIDIA RAG Blueprint: - Optional features reflection and image captioning are not available in Helm-based deployment. - Currently, Helm-based deployment is not supported for [NeMo Guardrails](nemo-guardrails.md). - The Blueprint responses can have significant latency when using [NVIDIA API Catalog cloud hosted models](deploy-docker-nvidia-hosted.md). -- The accuracy of the pipeline is optimized for certain file types like `.pdf`, `.txt`, `.docx`. The accuracy may be poor for other file types supported by NV-Ingest, since image captioning is disabled by default. +- The accuracy of the pipeline is optimized for certain file types like `.pdf`, `.txt`, `.docx`. The accuracy may be poor for other file types supported by NeMo Retriever Library, since image captioning is disabled by default. - When updating model configurations in Kubernetes `values.yaml` (for example, changing from 70B to 8B models), the RAG UI automatically detects and displays the new model configuration from the backend. No container rebuilds are required - simply redeploy the Helm chart with updated values and refresh the UI to see the new model settings in the Settings panel. - The NeMo LLM microservice can take 5-6 minutes to start for every deployment. - B200 GPUs are not supported for the following advanced features. For these features, use H100 or A100 GPUs instead. diff --git a/docs/retrieval-only-deployment.md b/docs/retrieval-only-deployment.md index 3cfc5d30a..7f7f94475 100644 --- a/docs/retrieval-only-deployment.md +++ b/docs/retrieval-only-deployment.md @@ -88,11 +88,11 @@ Choose one of the following options based on your deployment preference. Instead of starting all NIMs, use the `text-embed` profile to start only the embedding and reranking services: ```bash -USERID=$(id -u) docker compose -f deploy/compose/nims.yaml up -d nemoretriever-ranking-ms nemoretriever-embedding-ms +USERID=$(id -u) docker compose -f deploy/compose/nims.yaml up -d nemotron-ranking-ms nemotron-embedding-ms ``` :::{note} -The `text-embed` profile starts only `nemoretriever-embedding-ms` and `nemoretriever-ranking-ms `, which is sufficient for retrieval operations. The LLM NIM (`nim-llm-ms`) is not started, saving significant GPU memory. +The `text-embed` profile starts only `nemotron-embedding-ms` and `nemotron-ranking-ms `, which is sufficient for retrieval operations. The LLM NIM (`nim-llm-ms`) is not started, saving significant GPU memory. ::: Wait for the services to become healthy: @@ -105,8 +105,8 @@ Expected output: ```output NAMES STATUS -nemoretriever-ranking-ms Up 5 minutes (healthy) -nemoretriever-embedding-ms Up 5 minutes (healthy) +nemotron-ranking-ms Up 5 minutes (healthy) +nemotron-embedding-ms Up 5 minutes (healthy) ``` #### Option B: NVIDIA-Hosted NIMs @@ -308,7 +308,7 @@ This is expected behavior in retrieval-only mode. The `/generate` endpoint requi Check the embedding NIM logs: ```bash -docker logs nemoretriever-embedding-ms +docker logs nemotron-embedding-ms ``` Ensure the model cache directory has proper permissions: diff --git a/docs/scripts/build_multiversion_docs.ps1 b/docs/scripts/build_multiversion_docs.ps1 new file mode 100644 index 000000000..4aa131798 --- /dev/null +++ b/docs/scripts/build_multiversion_docs.ps1 @@ -0,0 +1,165 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +<# +.SYNOPSIS + Build Sphinx HTML for multiple release lines into a single publish layout. + +.DESCRIPTION + For each version, checks out git ref v{version} (tag) or release-v{version} (branch), + writes the canonical docs/versions1.json (so every build lists the same versions), + runs verify_doc_version_manifest.py, then sphinx-build into + docs/_build/multiversion/{version}/. + + Copies the same manifest to docs/_build/multiversion/versions1.json for the + version switcher when the site root is this folder. + + Requires a clean working tree unless -AllowDirty is used. + +.PARAMETER Versions + Semver strings without a leading v, e.g. 2.3.0, 2.4.0, 2.5.0 + +.PARAMETER CanonicalManifest + Path to the versions1.json to inject on every checkout (default: docs/versions1.json + from the working tree at script start — save a backup first if needed). + +.PARAMETER OutputRoot + Directory under docs/ that will contain per-version folders and root versions1.json + (default: docs/_build/multiversion). + +.EXAMPLE + .\docs\scripts\build_multiversion_docs.ps1 -Versions @('2.3.0','2.4.0','2.5.0') + +.EXAMPLE + .\docs\scripts\build_multiversion_docs.ps1 -DryRun +#> + +[CmdletBinding()] +param( + [Parameter(Position = 0)] + [string[]]$Versions = @('2.3.0', '2.4.0', '2.5.0'), + + [string]$CanonicalManifest = '', + + [string]$OutputRoot = '', + + [switch]$DryRun, + + [switch]$AllowDirty +) + +Set-StrictMode -Version Latest +$ErrorActionPreference = 'Stop' + +$RepoRoot = (Resolve-Path (Join-Path $PSScriptRoot '..\..')).Path +Set-Location $RepoRoot + +if (-not $OutputRoot) { + $OutputRoot = Join-Path $RepoRoot 'docs\_build\multiversion' +} else { + $OutputRoot = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($OutputRoot) +} + +if (-not $CanonicalManifest) { + $CanonicalManifest = Join-Path $RepoRoot 'docs\versions1.json' +} else { + $CanonicalManifest = $ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath($CanonicalManifest) +} + +function Resolve-VersionGitRef { + param([string]$Version) + $tag = "v$Version" + git rev-parse -q --verify "refs/tags/$tag" 2>$null | Out-Null + if ($LASTEXITCODE -eq 0) { + return $tag + } + $branch = "release-v$Version" + git rev-parse -q --verify "refs/heads/$branch" 2>$null | Out-Null + if ($LASTEXITCODE -eq 0) { + return $branch + } + throw "No git tag '$tag' or branch '$branch' found for version $Version" +} + +if (-not $DryRun) { + $dirty = git status --porcelain + if ($dirty -and -not $AllowDirty) { + throw "Working tree is dirty. Commit or stash changes, or pass -AllowDirty." + } +} + +if (-not (Test-Path -LiteralPath $CanonicalManifest)) { + throw "Canonical manifest not found: $CanonicalManifest" +} + +$canonicalJson = [System.IO.File]::ReadAllText($CanonicalManifest) + +$origHead = git rev-parse HEAD + +try { + if (-not $DryRun) { + New-Item -ItemType Directory -Force -Path $OutputRoot | Out-Null + } + + foreach ($ver in $Versions) { + $ref = Resolve-VersionGitRef -Version $ver + $dest = Join-Path $OutputRoot $ver + + Write-Host "==> Version $ver <= ref $ref => $dest" -ForegroundColor Cyan + + if ($DryRun) { + continue + } + + git checkout $ref + [System.IO.File]::WriteAllText( + (Join-Path $RepoRoot 'docs\versions1.json'), + $canonicalJson, + [System.Text.UTF8Encoding]::new($false) + ) + + & uv run python docs/scripts/verify_doc_version_manifest.py + if ($LASTEXITCODE -ne 0) { + throw "verify_doc_version_manifest.py failed for $ver (ref $ref)" + } + + if (Test-Path -LiteralPath $dest) { + Remove-Item -LiteralPath $dest -Recurse -Force + } + New-Item -ItemType Directory -Force -Path $dest | Out-Null + + & uv run --group docs sphinx-build docs $dest + if ($LASTEXITCODE -ne 0) { + throw "sphinx-build failed for $ver" + } + } + + if (-not $DryRun) { + $rootManifest = Join-Path $OutputRoot 'versions1.json' + [System.IO.File]::WriteAllText( + $rootManifest, + $canonicalJson, + [System.Text.UTF8Encoding]::new($false) + ) + Write-Host "Wrote $rootManifest" -ForegroundColor Green + } +} +finally { + if (-not $DryRun) { + git checkout $origHead + Write-Host "Restored HEAD to $origHead" -ForegroundColor DarkGray + } +} + +Write-Host 'Done.' -ForegroundColor Green diff --git a/docs/scripts/build_multiversion_docs.sh b/docs/scripts/build_multiversion_docs.sh new file mode 100644 index 000000000..23643b35f --- /dev/null +++ b/docs/scripts/build_multiversion_docs.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Build Sphinx HTML for multiple release lines into docs/_build/multiversion/. +# See build_multiversion_docs.ps1 for behavior and options. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +cd "${REPO_ROOT}" + +DRY_RUN=0 +ALLOW_DIRTY=0 +VERSIONS=(2.3.0 2.4.0 2.5.0) +CANONICAL_MANIFEST="${REPO_ROOT}/docs/versions1.json" +OUTPUT_ROOT="${REPO_ROOT}/docs/_build/multiversion" + +usage() { + echo "Usage: $0 [--dry-run] [--allow-dirty] [--versions V1,V2,...] [--manifest PATH] [--output-root PATH]" >&2 + exit 1 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) DRY_RUN=1; shift ;; + --allow-dirty) ALLOW_DIRTY=1; shift ;; + --versions) + IFS=',' read -r -a VERSIONS <<< "$2" + shift 2 + ;; + --manifest) CANONICAL_MANIFEST="$2"; shift 2 ;; + --output-root) OUTPUT_ROOT="$2"; shift 2 ;; + -h|--help) usage ;; + *) echo "Unknown option: $1" >&2; usage ;; + esac +done + +resolve_ref() { + local ver="$1" + local tag="v${ver}" + local branch="release-v${ver}" + if git rev-parse -q --verify "refs/tags/${tag}" >/dev/null 2>&1; then + echo "${tag}" + return + fi + if git rev-parse -q --verify "refs/heads/${branch}" >/dev/null 2>&1; then + echo "${branch}" + return + fi + echo "No git tag ${tag} or branch ${branch} for ${ver}" >&2 + return 1 +} + +if [[ "${DRY_RUN}" -eq 0 ]]; then + if [[ -n "$(git status --porcelain)" && "${ALLOW_DIRTY}" -eq 0 ]]; then + echo "Working tree is dirty. Commit or stash, or pass --allow-dirty." >&2 + exit 1 + fi +fi + +if [[ ! -f "${CANONICAL_MANIFEST}" ]]; then + echo "Canonical manifest not found: ${CANONICAL_MANIFEST}" >&2 + exit 1 +fi + +canonical_json="$(cat "${CANONICAL_MANIFEST}")" +orig_head="$(git rev-parse HEAD)" + +if [[ "${DRY_RUN}" -eq 0 ]]; then + mkdir -p "${OUTPUT_ROOT}" + trap 'git checkout "${orig_head}"' EXIT +fi + +for ver in "${VERSIONS[@]}"; do + ref="$(resolve_ref "${ver}")" + dest="${OUTPUT_ROOT}/${ver}" + echo "==> Version ${ver} <= ref ${ref} => ${dest}" + + if [[ "${DRY_RUN}" -ne 0 ]]; then + continue + fi + + git checkout "${ref}" + printf '%s' "${canonical_json}" >"${REPO_ROOT}/docs/versions1.json" + + uv run python docs/scripts/verify_doc_version_manifest.py + + rm -rf "${dest}" + mkdir -p "${dest}" + uv run --group docs sphinx-build docs "${dest}" +done + +if [[ "${DRY_RUN}" -eq 0 ]]; then + printf '%s' "${canonical_json}" >"${OUTPUT_ROOT}/versions1.json" + echo "Wrote ${OUTPUT_ROOT}/versions1.json" +fi + +echo "Done." diff --git a/docs/scripts/verify_doc_version_manifest.py b/docs/scripts/verify_doc_version_manifest.py new file mode 100644 index 000000000..6392064ed --- /dev/null +++ b/docs/scripts/verify_doc_version_manifest.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Validate docs/versions1.json and consistency with conf.py / project.json. + +Run from the repository root: + + uv run python docs/scripts/verify_doc_version_manifest.py + +Use before building and publishing documentation so the version switcher manifest +is well-formed and matches the current branch's declared release. +""" + +from __future__ import annotations + +import argparse +import ast +import json +import re +import sys +from pathlib import Path + + +def _docs_dir() -> Path: + return Path(__file__).resolve().parent.parent + + +def _read_release_from_conf(conf_path: Path) -> str: + tree = ast.parse(conf_path.read_text(encoding="utf-8")) + for node in ast.walk(tree): + if isinstance(node, ast.Assign): + for target in node.targets: + if isinstance(target, ast.Name) and target.id == "release": + value = node.value + if isinstance(value, ast.Constant) and isinstance( + value.value, str + ): + return value.value + raise ValueError(f'Could not find release = "..." string in {conf_path}') + + +def _validate_versions_payload(data: object) -> list[dict[str, object]]: + if not isinstance(data, list): + raise ValueError("versions1.json must be a JSON array") + rows: list[dict[str, object]] = [] + for i, item in enumerate(data): + if not isinstance(item, dict): + raise ValueError(f"Entry {i} must be an object") + rows.append(item) + return rows + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--docs-dir", + type=Path, + default=_docs_dir(), + help="Path to the docs/ folder (default: next to this script)", + ) + args = parser.parse_args() + docs = args.docs_dir.resolve() + versions_path = docs / "versions1.json" + conf_path = docs / "conf.py" + project_path = docs / "project.json" + + errors: list[str] = [] + + try: + payload = json.loads(versions_path.read_text(encoding="utf-8")) + rows = _validate_versions_payload(payload) + except (OSError, json.JSONDecodeError, ValueError) as e: + print(f"ERROR: {versions_path}: {e}", file=sys.stderr) + return 1 + + preferred_count = 0 + url_re = re.compile(r"^\.\./[0-9]+\.[0-9]+\.[0-9]+/$") + for i, row in enumerate(rows): + ver = row.get("version") + url = row.get("url") + if not isinstance(ver, str) or not ver.strip(): + errors.append(f"Entry {i}: missing or invalid 'version'") + if not isinstance(url, str) or not url_re.match(url): + errors.append( + f"Entry {i}: 'url' must look like '../M.m.p/' (got {url!r})" + ) + if row.get("preferred") is True: + preferred_count += 1 + elif "preferred" in row and row["preferred"] not in (False, None): + errors.append(f"Entry {i}: 'preferred' must be true or omitted") + + if preferred_count != 1: + errors.append( + f"Expected exactly one entry with 'preferred': true, got {preferred_count}" + ) + + try: + release = _read_release_from_conf(conf_path) + except (OSError, ValueError) as e: + errors.append(f"conf.py: {e}") + release = None + + proj_ver: str | None = None + try: + proj = json.loads(project_path.read_text(encoding="utf-8")) + if isinstance(proj, dict): + v = proj.get("version") + proj_ver = v if isinstance(v, str) else None + if proj_ver is None: + errors.append("project.json: missing top-level string 'version'") + except (OSError, json.JSONDecodeError) as e: + errors.append(f"project.json: {e}") + + if not errors and release is not None and proj_ver is not None: + if proj_ver != release: + errors.append( + f"docs/conf.py release ({release!r}) != docs/project.json " + f"version ({proj_ver!r}) — they should match for this branch" + ) + + if errors: + print(f"Validation failed for {versions_path}:", file=sys.stderr) + for msg in errors: + print(f" - {msg}", file=sys.stderr) + return 1 + + print(f"OK: {versions_path} ({len(rows)} versions)") + if release is not None: + print(f"OK: conf.py release and project.json version both {release!r}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/docs/service-port-gpu-reference.md b/docs/service-port-gpu-reference.md index 648d1bd32..ed24b39f2 100644 --- a/docs/service-port-gpu-reference.md +++ b/docs/service-port-gpu-reference.md @@ -13,23 +13,23 @@ The following table provides a comprehensive reference of all services, their po | RAG Server | `rag-server` | 8081 | 8081 | N/A (CPU) | Main RAG API endpoint | | Ingestor Server | `ingestor-server` | 8082 | 8082 | N/A (CPU) | Document ingestion API | | RAG Frontend | `rag-frontend` | 8090 | 3000 | N/A (CPU) | Web UI | -| NV-Ingest Runtime | `nv-ingest-ms-runtime` | 7670, 7671, 8265 | 7670, 7671, 8265 | N/A (CPU) | Main orchestrator (Ray dashboard: 8265) | +| NeMo Retriever Library Runtime | `nv-ingest-ms-runtime` | 7670, 7671, 8265 | 7670, 7671, 8265 | N/A (CPU) | Main orchestrator (Ray dashboard: 8265) | ## NIM Microservices | Service | Container Name | Host Port(s) | Container Port(s) | Default GPU ID | Environment Variable | Notes | |---------|---------------|--------------|-------------------|----------------|---------------------|-------| | LLM | `nim-llm-ms` | 8999 | 8000 | 1 | `LLM_MS_GPU_ID` | Main language model | -| Embedding | `nemoretriever-embedding-ms` | 9080 | 8000 | 0 | `EMBEDDING_MS_GPU_ID` | Text embeddings | +| Embedding | `nemotron-embedding-ms` | 9080 | 8000 | 0 | `EMBEDDING_MS_GPU_ID` | Text embeddings | | VLM Embedding | `nemotron-vlm-embedding-ms` | 9081 | 8000 | 0 | `VLM_EMBEDDING_MS_GPU_ID` | Vision-language embeddings (opt-in, profile: vlm-embed) | -| Ranking | `nemoretriever-ranking-ms` | 1976 | 8000 | 0 | `RANKING_MS_GPU_ID` | Reranking model | +| Ranking | `nemotron-ranking-ms` | 1976 | 8000 | 0 | `RANKING_MS_GPU_ID` | Reranking model | | VLM | `nemo-vlm-microservice` | 1977 | 8000 | 5 | `VLM_MS_GPU_ID` | Vision-language model (opt-in, profile: vlm-only, vlm-generation) | | Nemotron Parse | `compose-nemotron-parse-1` | 8015, 8016, 8017 | 8000, 8001, 8002 | 1 | `NEMOTRON_PARSE_MS_GPU_ID` | PDF parsing (opt-in, profile: nemotron-parse) | | RIVA ASR | `compose-audio-1` | 8021, 8022 | 50051, 9000 | 0 | `AUDIO_MS_GPU_ID` | Audio speech recognition (opt-in, profile: audio) | | Page Elements | `compose-page-elements-1` | 8000, 8001, 8002 | 8000, 8001, 8002 | 0 | `YOLOX_MS_GPU_ID` | Object detection for pages | | Graphic Elements | `compose-graphic-elements-1` | 8003, 8004, 8005 | 8000, 8001, 8002 | 0 | `YOLOX_GRAPHICS_MS_GPU_ID` | Graphics detection | | Table Structure | `compose-table-structure-1` | 8006, 8007, 8008 | 8000, 8001, 8002 | 0 | `YOLOX_TABLE_MS_GPU_ID` | Table structure detection | -| NeMo Retriever OCR | `compose-nemoretriever-ocr-1` | 8012, 8013, 8014 | 8000, 8001, 8002 | 0 | `OCR_MS_GPU_ID` | OCR service (default) | +| NeMo Retriever Library OCR | `compose-nemoretriever-ocr-1` | 8012, 8013, 8014 | 8000, 8001, 8002 | 0 | `OCR_MS_GPU_ID` | OCR service (default) | ## Vector Database and Infrastructure diff --git a/docs/support-matrix.md b/docs/support-matrix.md index 6344822e1..e0e2f88ae 100644 --- a/docs/support-matrix.md +++ b/docs/support-matrix.md @@ -78,8 +78,8 @@ The following are requirements and recommendations for the individual components - **LLM NIM (llama-3.3-nemotron-super-49b-v1.5)** – Refer to the [Support Matrix](https://docs.nvidia.com/nim/large-language-models/latest/supported-models.html#llama-3-3-nemotron-super-49b-v1-5). - **Embedding NIM (Llama-3.2-NV-EmbedQA-1B-v2 )** – Refer to the [Support Matrix](https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html#llama-3-2-nv-embedqa-1b-v2). - **Reranking NIM (llama-3_2-nv-rerankqa-1b-v2 )**: Refer to the [Support Matrix](https://docs.nvidia.com/nim/nemo-retriever/text-reranking/latest/support-matrix.html#llama-3-2-nv-rerankqa-1b-v2). -- **NeMo Retriever OCR (Default)**: Refer to the [Support Matrix](https://docs.nvidia.com/nim/ingestion/image-ocr/1.2.0/support-matrix.html). -- **NVIDIA NIM for Image OCR (baidu/paddleocr - Legacy)**: Refer to the [Support Matrix](https://docs.nvidia.com/nim/ingestion/table-extraction/latest/support-matrix.html#supported-hardware). +- **NVIDIA NIM for Image OCR (baidu/paddleocr)**: Refer to the [Support Matrix](https://docs.nvidia.com/nemo/retriever/latest/extraction/support-matrix/). +**NeMo Retriever OCR**: Refer to the [Support Matrix](https://docs.nvidia.com/nemo/retriever/latest/extraction/support-matrix/) - **NVIDIA NIMs for Object Detection**: - NeMo Retriever Page Elements v3 [Support Matrix](https://docs.nvidia.com/nim/ingestion/object-detection/latest/support-matrix.html#nemo-retriever-page-elements-v3) - NeMo Retriever Graphic Elements v1 [Support Matrix](https://docs.nvidia.com/nim/ingestion/object-detection/latest/support-matrix.html#nemo-retriever-graphic-elements-v1) diff --git a/docs/text_only_ingest.md b/docs/text_only_ingest.md index 784c08978..c2a22afd6 100644 --- a/docs/text_only_ingest.md +++ b/docs/text_only_ingest.md @@ -19,7 +19,7 @@ You can enable text-only ingestion for the [NVIDIA RAG Blueprint](readme.md). Fo ``` :::{important} - When disabling nv-ingest dependent services, you must set `COMPONENTS_TO_READY_CHECK=""` to ensure the nv-ingest container reaches ready state. Without this setting, nv-ingest will wait indefinitely for the disabled components. + When disabling NeMo Retriever Library dependent services, you must set `COMPONENTS_TO_READY_CHECK=""` to ensure the NeMo Retriever Library container reaches ready state. Without this setting, the NeMo Retriever Library container will wait indefinitely for the disabled components. ::: Then deploy the ingestor-server: @@ -43,8 +43,8 @@ You can enable text-only ingestion for the [NVIDIA RAG Blueprint](readme.md). Fo ```output NAMES STATUS - nemoretriever-ranking-ms Up 14 minutes (healthy) - nemoretriever-embedding-ms Up 14 minutes (healthy) + nemotron-ranking-ms Up 14 minutes (healthy) + nemotron-embedding-ms Up 14 minutes (healthy) nim-llm-ms Up 14 minutes (healthy) ``` @@ -70,7 +70,7 @@ In case you are [interacting with cloud hosted models](deploy-docker-nvidia-host export APP_EMBEDDINGS_SERVERURL="" export APP_LLM_SERVERURL="" export APP_RANKING_SERVERURL="" - export YOLOX_HTTP_ENDPOINT="https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v3" + export YOLOX_HTTP_ENDPOINT="https://ai.api.nvidia.com/v1/cv/nvidia/nemotron-page-elements-v3" export YOLOX_INFER_PROTOCOL="http" ``` ::: @@ -113,7 +113,7 @@ Additionally, ensure that **table extraction**, **chart extraction**, and **imag 2. Then use the modified [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml) file in your Helm upgrade command: ```bash -helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvstaging/blueprint/charts/nvidia-blueprint-rag-v2.4.0.tgz \ +helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvidia/blueprint/charts/nvidia-blueprint-rag-v2.5.0.tgz \ --username '$oauthtoken' \ --password "${NGC_API_KEY}" \ --values deploy/helm/nvidia-blueprint-rag/values.yaml \ @@ -131,9 +131,9 @@ helm upgrade --install rag -n rag https://helm.ngc.nvidia.com/nvstaging/blueprin ``` :::{important} -**Disabling NV-Ingest Components for GPU Resource Management:** +**Disabling NeMo Retriever Library Components for GPU Resource Management:** -If you disable any nv-ingest dependent services (such as `table_structure`, `graphic_elements`, `nemoretriever_ocr_v1`, etc.) to free up GPU resources for customization, you must set the `COMPONENTS_TO_READY_CHECK` parameter to an empty string in the `nv-ingest.envVars` section of your [values.yaml](../deploy/helm/nvidia-blueprint-rag/values.yaml) file: +If you disable any NeMo Retriever Library dependent services (such as `table_structure`, `graphic_elements`, `nemoretriever_ocr_v1`, etc.) to free up GPU resources for customization, you must set the `COMPONENTS_TO_READY_CHECK` parameter to an empty string in the `nv-ingest.envVars` section of your [values.yaml](../deploy/helm/nvidia-blueprint-rag/values.yaml) file: ```yaml nv-ingest: @@ -141,6 +141,6 @@ nv-ingest: COMPONENTS_TO_READY_CHECK: "" ``` -This ensures the nv-ingest pod reaches ready state even when some dependent components are disabled. Without this setting, the nv-ingest pod will wait indefinitely for the disabled components to become ready. +This ensures the NeMo Retriever Library pod reaches ready state even when some dependent components are disabled. Without this setting, the NeMo Retriever Library pod will wait indefinitely for the disabled components to become ready. ::: diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 319056bf0..782176ed2 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -87,10 +87,10 @@ During first-time deployments, large models are downloaded without visible progr docker logs -f nim-llm-ms # Monitor embedding service -docker logs -f nemoretriever-embedding-ms +docker logs -f nemotron-embedding-ms # Monitor ranking service -docker logs -f nemoretriever-ranking-ms +docker logs -f nemotron-ranking-ms ``` **Check disk usage to verify download progress:** @@ -105,7 +105,7 @@ watch -n 10 'du -sh ~/.cache/model-cache/' **Check container stats:** ```bash # View resource usage and verify containers are active -docker stats nim-llm-ms nemoretriever-embedding-ms nemoretriever-ranking-ms +docker stats nim-llm-ms nemotron-embedding-ms nemotron-ranking-ms ``` ### Kubernetes/Helm Deployments @@ -340,7 +340,7 @@ If the above error related to dependency conflicts are seen while building conta We've integrated VDB and embedding creation directly into the pipeline with caching included for expediency. However, in a production environment, it's better to use a separately managed VDB service. -NVIDIA offers optimized models and tools like NVIDIA NeMo Retriever ([build.nvidia.com/explore/retrieval](https://build.nvidia.com/explore/retrieval)) +NVIDIA offers optimized models and tools like NVIDIA NeMo Retriever Library ([build.nvidia.com/explore/retrieval](https://build.nvidia.com/explore/retrieval)) and cuVS ([github.com/rapidsai/cuvs](https://github.com/rapidsai/cuvs)). @@ -367,7 +367,7 @@ Adding this information may impact response accuracy, especially when partial in ## Helm Deployment Issues ### PVCs in Pending state (StorageClass issues) -If NIM Cache PVCs (e.g., `nemoretriever-embedding-ms-cache-pvc`) remain in `Pending` state, check if they are requesting a `storageClassName: default` that does not exist. +If NIM Cache PVCs (e.g., `nemotron-embedding-ms-cache-pvc`) remain in `Pending` state, check if they are requesting a `storageClassName: default` that does not exist. **Fix:** Ensure you have a default storage class. If using `local-path`, you can create an alias: ```yaml apiVersion: storage.k8s.io/v1 diff --git a/docs/versions1.json b/docs/versions1.json index d0731c374..2a3a5ee92 100644 --- a/docs/versions1.json +++ b/docs/versions1.json @@ -1,6 +1,10 @@ [ { "preferred": true, + "version": "2.5.0", + "url": "../2.5.0/" + }, + { "version": "2.4.0", "url": "../2.4.0/" }, diff --git a/docs/vlm-embed.md b/docs/vlm-embed.md index 5b9913232..01ab062a8 100644 --- a/docs/vlm-embed.md +++ b/docs/vlm-embed.md @@ -2,7 +2,7 @@ SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0 --> -# Use Multimodal (VLM) Embedding for Ingestion for NVIDIA RAG Blueprint (Early Access) +# Use Multimodal (VLM) Embedding for Ingestion for NVIDIA RAG Blueprint This guide shows how to enable and use the multimodal embedding model `nvidia/llama-nemotron-embed-vl-1b-v2` with the [NVIDIA RAG Blueprint](readme.md) ingestion pipeline. @@ -153,8 +153,8 @@ To deploy the VLM embedding service with Helm, update the image and model settin nvidia-nim-llama-nemotron-embed-vl-1b-v2: enabled: true image: - repository: nvcr.io/nvidia/nemo-microservices/llama-3.2-nemoretriever-1b-vlm-embed-v1 - tag: "1.7.0" + repository: nvcr.io/nim/nvidia/llama-nemotron-embed-vl-1b-v2 + tag: "1.12.0" # Optional: disable the default text embedding NIM nvidia-nim-llama-32-nv-embedqa-1b-v2: @@ -182,8 +182,6 @@ After modifying [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml) For detailed HELM deployment instructions, see [Helm Deployment Guide](deploy-helm.md). - - ## Additional Configuration: Extraction and Embedding Modalities To configure how content is extracted and embedded (similar to the Docker configurations shown above), you can add extraction and modality settings to your [`values.yaml`](../deploy/helm/nvidia-blueprint-rag/values.yaml): @@ -214,7 +212,7 @@ ingestor-server: nv-ingest: envVars: - # NV-Ingest runtime embedding target + # NeMo Retriever Library runtime embedding target EMBEDDING_NIM_ENDPOINT: "http://nemotron-vlm-embedding-ms:8000/v1" EMBEDDING_NIM_MODEL_NAME: "nvidia/llama-nemotron-embed-vl-1b-v2" ``` diff --git a/docs/vlm.md b/docs/vlm.md index 4a61b2f52..64c6af176 100644 --- a/docs/vlm.md +++ b/docs/vlm.md @@ -124,7 +124,7 @@ Continue with [Deploy with Docker (NVIDIA-Hosted Models)](deploy-docker-nvidia-h ## Enable VLM with Helm :::{note} -**GPU requirements for Helm**: VLM uses the same GPU normally assigned to LLM (GPU 1). With MIG slicing, assign a dedicated MIG slice to the VLM—see [mig-deployment.md](mig-deployment.md) and [values-mig.yaml](../deploy/helm/mig-slicing/values-mig.yaml). To run both VLM and LLM simultaneously, an additional GPU is required. +**GPU requirements for Helm**: VLM uses the same GPU normally assigned to LLM (GPU 1). With MIG slicing, assign a dedicated MIG slice to the VLM—see [mig-deployment.md](mig-deployment.md) and [values-mig-h100.yaml](../deploy/helm/mig-slicing/values-mig-h100.yaml) or [values-mig-rtx6000.yaml](../deploy/helm/mig-slicing/values-mig-rtx6000.yaml). To run both VLM and LLM simultaneously, an additional GPU is required. ::: 1. In [values.yaml](../deploy/helm/nvidia-blueprint-rag/values.yaml), under the `rag-server` `envVars` section, set: diff --git a/examples/README.md b/examples/README.md index 0d56c4781..b2e991ca6 100644 --- a/examples/README.md +++ b/examples/README.md @@ -8,6 +8,8 @@ This directory contains example integrations and extensions for NVIDIA RAG. |---------|-------------|---------------| | [rag_react_agent](./rag_react_agent/) | Integration with [NeMo Agent Toolkit (NAT)](https://github.com/NVIDIA/NeMo-Agent-Toolkit) providing RAG query and search capabilities for agent workflows | [README](./rag_react_agent/README.md) | | [nvidia_rag_mcp](./nvidia_rag_mcp/) | MCP (Model Context Protocol) server and client for exposing NVIDIA RAG capabilities to MCP-compatible applications | [Documentation](../docs/mcp.md) | +| [rag_event_ingest](./rag_event_ingest/) | Automated document ingestion from object storage (MinIO) via Kafka | [Notebook](../notebooks/rag_event_ingest.ipynb) | +| [google-cloud-netapp-volumes-data-ingestor](./google-cloud-netapp-volumes-data-ingestor/) | Helm chart for deploying the GCNV data ingestor with PVC-backed storage and configurable runtime settings | [README](./google-cloud-netapp-volumes-data-ingestor/README.md) | ## rag_react_agent @@ -27,3 +29,22 @@ This example provides an MCP server and client that exposes NVIDIA RAG and Inges - Manage collections and documents in the vector database See the [MCP documentation](../docs/mcp.md) for detailed setup and usage instructions. + +## rag_event_ingest + +This example deploys an event-driven ingestion pipeline that monitors MinIO object storage for new file uploads via Kafka events. Documents are automatically indexed through the RAG Ingestor and become queryable through the RAG Agent. + +Components: +- **kafka_consumer/** - Event-driven consumer that routes files to RAG based on file type +- **deploy/** - Docker Compose for Kafka, MinIO, and the consumer +- **data/** - Sample documents for testing + +See the [notebook](../notebooks/rag_event_ingest.ipynb) for step-by-step deployment and testing. + +## google-cloud-netapp-volumes-data-ingestor + +This example packages a GCNV data ingestor deployment as a reusable Helm chart. It is intended for Kubernetes environments where application state and source data are mounted from PVCs, including NetApp Google Cloud NetApp Volumes-backed storage. + +The chart supports configurable image settings, PVC creation or reuse, health probes, service exposure, and runtime environment overrides for connecting to an NVIDIA ingestor endpoint. + +See the [google-cloud-netapp-volumes-data-ingestor README](./google-cloud-netapp-volumes-data-ingestor/README.md) for prerequisites, installation, and configuration details. diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/Chart.yaml b/examples/google-cloud-netapp-volumes-data-ingestor/Chart.yaml new file mode 100644 index 000000000..6fa7cd8b9 --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: gcnv-data-ingestor +description: Public Helm chart for the GCNV data ingestor deployment +type: application +version: 0.1.0 +appVersion: "0.1.0" diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/README.md b/examples/google-cloud-netapp-volumes-data-ingestor/README.md new file mode 100644 index 000000000..50c0fd09c --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/README.md @@ -0,0 +1,178 @@ +# Google Cloud NetApp Volumes (GCNV) Data Ingestor Helm Chart + +This chart packages the deployment of the GCNV Data Ingestor that integrates with the NVIDIA Foundational RAG pipeline into a reusable Helm chart at `examples/google-cloud-netapp-volumes-data-ingestor`. + +Create or target the namespace externally with `--namespace ... --create-namespace`. Chart-managed namespace creation is intentionally not supported because Helm cannot reliably create the release namespace from within the same chart. + +## Prerequisites + +Before installing this chart, make sure the cluster can provision or expose the required PVCs from NetApp Google Cloud NetApp Volumes (GCNV). + +1. Install and configure NetApp Trident in the target cluster. +2. Create or use a Trident `StorageClass` that maps to your GCNV backend. +3. Decide how you want the chart to get storage: + - Let the chart create PVCs by setting `appData.storageClassName` and `sourceData.storageClassName` to Trident-backed classes. + - Or create the PVCs ahead of time with Trident and set `appData.create=false`, `appData.existingClaim=`, `sourceData.create=false`, and `sourceData.existingClaim=`. + - If you set `create=false`, the matching `existingClaim` is required. +4. Make sure the Docker Hub image and tag you want to deploy are available. +5. If the Docker Hub repository is private, create an image pull secret in the target namespace and set `image.pullSecrets`. + +## Chart Layout + +```text +examples/google-cloud-netapp-volumes-data-ingestor/ +├── Chart.yaml +├── values.yaml +├── values.schema.json +├── README.md +└── templates/ + ├── _helpers.tpl + ├── deployment.yaml + ├── pvc.yaml + ├── service.yaml + ├── validate.yaml + └── serviceaccount.yaml +``` + +## Important Values + +Update these values before install: + +- `image.repository`: set to your Docker Hub image path +- `image.tag`: set to the image tag you want to deploy +- `appData.storageClassName`: set to your Trident-backed app PVC class when the chart creates the PVC +- `appData.size`: app PVC size request, defaults to `50Gi` +- `appData.existingClaim`: use an already-created PVC instead of letting the chart create one; required when `appData.create=false` +- `sourceData.storageClassName`: set to your Trident-backed GCNV source PVC class when the chart creates the PVC +- `sourceData.size`: source PVC size request, defaults to `200Gi` +- `sourceData.existingClaim`: use an already-created source PVC instead of letting the chart create one; required when `sourceData.create=false` +- `env.nvIngestEndpoint`: set to the reachable NVIDIA ingestor-server `/v1` base URL + +The chart validates required values during `helm lint`, `helm template`, `helm install`, and `helm upgrade`. + +## Install + +You can either edit `values.yaml` directly or use an override file. + +Example override file: + +```yaml +image: + repository: docker.io/acme/netapp_volumes_rag_ingestor + tag: "REPLACE_WITH_REAL_TAG" + +appData: + storageClassName: trident-app + +sourceData: + storageClassName: trident-gcnv + +env: + nvIngestEndpoint: http://YOUR_INGESTOR_SERVER:8082/v1 +``` + +Install with: + +```bash +helm install gcnv-data-ingestor ./examples/google-cloud-netapp-volumes-data-ingestor \ + --namespace gcnv-data-ingestor \ + --create-namespace \ + -f my-values.yaml +``` + +If your Docker Hub repository is private, add an image pull secret. `image.pullSecrets` must be a YAML list of secret names: + +```yaml +image: + pullSecrets: + - dockerhub-secret +``` + +## Common Overrides + +Resize the chart-managed PVCs: + +```yaml +appData: + size: 100Gi + storageClassName: trident-app + +sourceData: + size: 500Gi + storageClassName: trident-gcnv +``` + +## Use Existing PVCs + +If Trident or another workflow already created the claims you want to mount, use overrides like this: + +```yaml +appData: + create: false + existingClaim: gcnv-ingestor-config-data + +sourceData: + create: false + existingClaim: gcnv-data-for-rag +``` + +The chart will mount those existing claims into the Pod instead of creating new PVCs. + +If you set `create: false` and leave `existingClaim` empty, the chart now fails fast during Helm validation instead of creating a broken release. + +Expose the service differently: + +```yaml +service: + type: ClusterIP + port: 8000 +``` + +Tune runtime resources: + +```yaml +resources: + requests: + cpu: 1 + memory: 2Gi + limits: + cpu: 4 + memory: 8Gi +``` + +Pass extra environment variables using normal Kubernetes `env` list syntax: + +```yaml +env: + extra: + - name: EXTRA_FLAG + value: "1" +``` + +## Supported Values + +The chart supports overrides for the following areas in `values.yaml`: + +- Naming: `nameOverride`, `fullnameOverride` +- Labels: `selectorLabels`, `podLabels`, `podAnnotations` +- Deployment: `replicaCount`, `strategy` +- Image: `image.repository`, `image.tag`, `image.pullPolicy`, `image.pullSecrets` +- Service account: `serviceAccount.create`, `serviceAccount.name`, `serviceAccount.automount`, `serviceAccount.annotations` +- Service: `service.type`, `service.port`, `service.annotations` +- App PVC: `appData.create`, `appData.existingClaim`, `appData.name`, `appData.accessModes`, `appData.size`, `appData.storageClassName`, `appData.mountPath` +- Source PVC: `sourceData.create`, `sourceData.existingClaim`, `sourceData.name`, `sourceData.accessModes`, `sourceData.size`, `sourceData.storageClassName`, `sourceData.mountPath`, `sourceData.readOnly` +- Environment: `env.scanOutputRoot`, `env.appDbPath`, `env.defaultIncrementalSchedulerMins`, `env.nvIngestMode`, `env.nvIngestEndpoint`, `env.extra` +- Health checks: `probes.liveness.*`, `probes.readiness.*` +- Scheduling and placement: `nodeSelector`, `tolerations`, `affinity` +- Resource limits: `resources` + +## Verify + +```bash +helm template gcnv-data-ingestor ./examples/google-cloud-netapp-volumes-data-ingestor -n gcnv-data-ingestor +kubectl get pods,svc,pvc -n gcnv-data-ingestor +``` + +The service defaults to `NodePort` with service port `8000`, matching the source manifest. Kubernetes assigns the external node port automatically unless you customize the Service separately. + +The default PVC access modes are `ReadWriteOnce`, so increasing `replicaCount` beyond `1` may require different storage semantics or pod placement constraints. diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/templates/_helpers.tpl b/examples/google-cloud-netapp-volumes-data-ingestor/templates/_helpers.tpl new file mode 100644 index 000000000..60cb834ba --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/templates/_helpers.tpl @@ -0,0 +1,80 @@ +{{/* +Expand the chart name. +*/}} +{{- define "gcnv-data-ingestor.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +*/}} +{{- define "gcnv-data-ingestor.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Chart name and version. +*/}} +{{- define "gcnv-data-ingestor.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels. +*/}} +{{- define "gcnv-data-ingestor.labels" -}} +helm.sh/chart: {{ include "gcnv-data-ingestor.chart" . }} +{{ include "gcnv-data-ingestor.selectorLabels" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end -}} + +{{/* +Selector labels copied from the source deployment. +*/}} +{{- define "gcnv-data-ingestor.selectorLabels" -}} +app.kubernetes.io/name: {{ .Values.selectorLabels.name }} +app.kubernetes.io/instance: {{ .Values.selectorLabels.instance }} +{{- end -}} + +{{/* +Service account name. +*/}} +{{- define "gcnv-data-ingestor.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} +{{- default (printf "%s-sa" (include "gcnv-data-ingestor.fullname" .)) .Values.serviceAccount.name -}} +{{- else -}} +{{- default "default" .Values.serviceAccount.name -}} +{{- end -}} +{{- end -}} + +{{/* +App PVC name. +*/}} +{{- define "gcnv-data-ingestor.appPvcName" -}} +{{- if .Values.appData.existingClaim -}} +{{- .Values.appData.existingClaim -}} +{{- else -}} +{{- .Values.appData.name -}} +{{- end -}} +{{- end -}} + +{{/* +Source PVC name. +*/}} +{{- define "gcnv-data-ingestor.sourcePvcName" -}} +{{- if .Values.sourceData.existingClaim -}} +{{- .Values.sourceData.existingClaim -}} +{{- else -}} +{{- .Values.sourceData.name -}} +{{- end -}} +{{- end -}} diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/templates/deployment.yaml b/examples/google-cloud-netapp-volumes-data-ingestor/templates/deployment.yaml new file mode 100644 index 000000000..c1fbec3d9 --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/templates/deployment.yaml @@ -0,0 +1,99 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "gcnv-data-ingestor.fullname" . }} + labels: + {{- include "gcnv-data-ingestor.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + strategy: + type: {{ .Values.strategy.type }} + selector: + matchLabels: + {{- include "gcnv-data-ingestor.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "gcnv-data-ingestor.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + serviceAccountName: {{ include "gcnv-data-ingestor.serviceAccountName" . }} + {{- with .Values.image.pullSecrets }} + imagePullSecrets: + {{- range . }} + - name: {{ . }} + {{- end }} + {{- end }} + containers: + - name: gcnv-data-ingestor + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + env: + - name: MOUNT_PATH + value: {{ .Values.sourceData.mountPath | quote }} + - name: SCAN_OUTPUT_ROOT + value: {{ .Values.env.scanOutputRoot | quote }} + - name: APP_DB_PATH + value: {{ .Values.env.appDbPath | quote }} + - name: DEFAULT_INCREMENTAL_SCHEDULER_MINS + value: {{ .Values.env.defaultIncrementalSchedulerMins | quote }} + - name: NV_INGEST_MODE + value: {{ .Values.env.nvIngestMode | quote }} + - name: NV_INGEST_ENDPOINT + value: {{ .Values.env.nvIngestEndpoint | quote }} + {{- with .Values.env.extra }} + {{- toYaml . | nindent 12 }} + {{- end }} + livenessProbe: + httpGet: + path: {{ .Values.probes.liveness.path }} + port: http + initialDelaySeconds: {{ .Values.probes.liveness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.liveness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.liveness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.liveness.failureThreshold }} + readinessProbe: + httpGet: + path: {{ .Values.probes.readiness.path }} + port: http + initialDelaySeconds: {{ .Values.probes.readiness.initialDelaySeconds }} + periodSeconds: {{ .Values.probes.readiness.periodSeconds }} + timeoutSeconds: {{ .Values.probes.readiness.timeoutSeconds }} + failureThreshold: {{ .Values.probes.readiness.failureThreshold }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: app-data + mountPath: {{ .Values.appData.mountPath | quote }} + - name: source-data + mountPath: {{ .Values.sourceData.mountPath | quote }} + readOnly: {{ .Values.sourceData.readOnly }} + volumes: + - name: app-data + persistentVolumeClaim: + claimName: {{ include "gcnv-data-ingestor.appPvcName" . }} + - name: source-data + persistentVolumeClaim: + claimName: {{ include "gcnv-data-ingestor.sourcePvcName" . }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/templates/pvc.yaml b/examples/google-cloud-netapp-volumes-data-ingestor/templates/pvc.yaml new file mode 100644 index 000000000..590166056 --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/templates/pvc.yaml @@ -0,0 +1,35 @@ +{{- if and .Values.appData.create (not .Values.appData.existingClaim) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "gcnv-data-ingestor.appPvcName" . }} + labels: + {{- include "gcnv-data-ingestor.labels" . | nindent 4 }} +spec: + accessModes: + {{- toYaml .Values.appData.accessModes | nindent 4 }} + resources: + requests: + storage: {{ .Values.appData.size }} + {{- with .Values.appData.storageClassName }} + storageClassName: {{ . | quote }} + {{- end }} +{{- end }} +{{- if and .Values.sourceData.create (not .Values.sourceData.existingClaim) }} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "gcnv-data-ingestor.sourcePvcName" . }} + labels: + {{- include "gcnv-data-ingestor.labels" . | nindent 4 }} +spec: + accessModes: + {{- toYaml .Values.sourceData.accessModes | nindent 4 }} + resources: + requests: + storage: {{ .Values.sourceData.size }} + {{- with .Values.sourceData.storageClassName }} + storageClassName: {{ . | quote }} + {{- end }} +{{- end }} diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/templates/service.yaml b/examples/google-cloud-netapp-volumes-data-ingestor/templates/service.yaml new file mode 100644 index 000000000..6d0ebbc36 --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/templates/service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "gcnv-data-ingestor.fullname" . }} + labels: + {{- include "gcnv-data-ingestor.labels" . | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.service.type }} + selector: + {{- include "gcnv-data-ingestor.selectorLabels" . | nindent 4 }} + ports: + - name: http + port: {{ .Values.service.port }} + targetPort: http + protocol: TCP diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/templates/serviceaccount.yaml b/examples/google-cloud-netapp-volumes-data-ingestor/templates/serviceaccount.yaml new file mode 100644 index 000000000..67fe6bf5f --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "gcnv-data-ingestor.serviceAccountName" . }} + labels: + {{- include "gcnv-data-ingestor.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/templates/validate.yaml b/examples/google-cloud-netapp-volumes-data-ingestor/templates/validate.yaml new file mode 100644 index 000000000..60f03ba4f --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/templates/validate.yaml @@ -0,0 +1,15 @@ +{{- if .Values.namespace.create }} +{{- fail "namespace.create is not supported by this chart. Create the namespace outside the chart with --create-namespace or kubectl create namespace." }} +{{- end }} + +{{- if and (not .Values.appData.create) (not .Values.appData.existingClaim) }} +{{- fail "appData.existingClaim is required when appData.create=false." }} +{{- end }} + +{{- if and (not .Values.sourceData.create) (not .Values.sourceData.existingClaim) }} +{{- fail "sourceData.existingClaim is required when sourceData.create=false." }} +{{- end }} + +{{- if and (not .Values.serviceAccount.create) (not .Values.serviceAccount.name) }} +{{- fail "serviceAccount.name is required when serviceAccount.create=false." }} +{{- end }} diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/values.schema.json b/examples/google-cloud-netapp-volumes-data-ingestor/values.schema.json new file mode 100644 index 000000000..d61f87d29 --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/values.schema.json @@ -0,0 +1,510 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "nameOverride": { + "type": "string" + }, + "fullnameOverride": { + "type": "string" + }, + "namespace": { + "type": "object", + "properties": { + "create": { + "type": "boolean" + } + } + }, + "selectorLabels": { + "type": "object", + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "instance": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "name", + "instance" + ] + }, + "replicaCount": { + "type": "integer", + "minimum": 1 + }, + "strategy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "Recreate", + "RollingUpdate" + ] + } + }, + "required": [ + "type" + ] + }, + "image": { + "type": "object", + "properties": { + "repository": { + "type": "string", + "minLength": 1 + }, + "tag": { + "type": "string", + "minLength": 1 + }, + "pullPolicy": { + "type": "string", + "enum": [ + "Always", + "IfNotPresent", + "Never" + ] + }, + "pullSecrets": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + } + }, + "required": [ + "repository", + "tag", + "pullPolicy", + "pullSecrets" + ] + }, + "serviceAccount": { + "type": "object", + "properties": { + "create": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "automount": { + "type": "boolean" + }, + "annotations": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": [ + "create", + "name", + "automount", + "annotations" + ], + "allOf": [ + { + "if": { + "properties": { + "create": { + "const": false + } + }, + "required": [ + "create" + ] + }, + "then": { + "properties": { + "name": { + "minLength": 1 + } + } + } + } + ] + }, + "service": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ClusterIP", + "NodePort", + "LoadBalancer" + ] + }, + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535 + }, + "annotations": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "required": [ + "type", + "port", + "annotations" + ] + }, + "appData": { + "type": "object", + "properties": { + "create": { + "type": "boolean" + }, + "existingClaim": { + "type": "string" + }, + "name": { + "type": "string", + "minLength": 1 + }, + "accessModes": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "ReadWriteOnce", + "ReadOnlyMany", + "ReadWriteMany", + "ReadWriteOncePod" + ] + } + }, + "size": { + "type": "string", + "minLength": 1 + }, + "storageClassName": { + "type": "string" + }, + "mountPath": { + "type": "string", + "minLength": 1 + } + }, + "required": [ + "create", + "existingClaim", + "name", + "accessModes", + "size", + "storageClassName", + "mountPath" + ], + "allOf": [ + { + "if": { + "properties": { + "create": { + "const": false + } + }, + "required": [ + "create" + ] + }, + "then": { + "properties": { + "existingClaim": { + "minLength": 1 + } + } + } + }, + { + "if": { + "properties": { + "create": { + "const": true + }, + "existingClaim": { + "maxLength": 0 + } + }, + "required": [ + "create", + "existingClaim" + ] + }, + "then": { + "properties": { + "storageClassName": { + "type": "string", + "minLength": 1, + "pattern": "^[a-z0-9]([-a-z0-9.]*[a-z0-9])?$" + } + } + } + } + ] + }, + "sourceData": { + "type": "object", + "properties": { + "create": { + "type": "boolean" + }, + "existingClaim": { + "type": "string" + }, + "name": { + "type": "string", + "minLength": 1 + }, + "accessModes": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "enum": [ + "ReadWriteOnce", + "ReadOnlyMany", + "ReadWriteMany", + "ReadWriteOncePod" + ] + } + }, + "size": { + "type": "string", + "minLength": 1 + }, + "storageClassName": { + "type": "string" + }, + "mountPath": { + "type": "string", + "minLength": 1 + }, + "readOnly": { + "type": "boolean" + } + }, + "required": [ + "create", + "existingClaim", + "name", + "accessModes", + "size", + "storageClassName", + "mountPath", + "readOnly" + ], + "allOf": [ + { + "if": { + "properties": { + "create": { + "const": false + } + }, + "required": [ + "create" + ] + }, + "then": { + "properties": { + "existingClaim": { + "minLength": 1 + } + } + } + }, + { + "if": { + "properties": { + "create": { + "const": true + }, + "existingClaim": { + "maxLength": 0 + } + }, + "required": [ + "create", + "existingClaim" + ] + }, + "then": { + "properties": { + "storageClassName": { + "type": "string", + "minLength": 1, + "pattern": "^[a-z0-9]([-a-z0-9.]*[a-z0-9])?$" + } + } + } + } + ] + }, + "env": { + "type": "object", + "properties": { + "scanOutputRoot": { + "type": "string", + "minLength": 1 + }, + "appDbPath": { + "type": "string", + "minLength": 1 + }, + "defaultIncrementalSchedulerMins": { + "type": "string", + "pattern": "^[0-9]+$" + }, + "nvIngestMode": { + "type": "string", + "minLength": 1 + }, + "nvIngestEndpoint": { + "type": "string", + "minLength": 1, + "pattern": "^https?://.+" + }, + "extra": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "value": { + "type": "string" + }, + "valueFrom": { + "type": "object" + } + }, + "required": [ + "name" + ], + "anyOf": [ + { + "required": [ + "value" + ] + }, + { + "required": [ + "valueFrom" + ] + } + ] + } + } + }, + "required": [ + "scanOutputRoot", + "appDbPath", + "defaultIncrementalSchedulerMins", + "nvIngestMode", + "nvIngestEndpoint", + "extra" + ] + }, + "probes": { + "type": "object", + "properties": { + "liveness": { + "$ref": "#/definitions/httpProbe" + }, + "readiness": { + "$ref": "#/definitions/httpProbe" + } + }, + "required": [ + "liveness", + "readiness" + ] + }, + "resources": { + "type": "object" + }, + "podAnnotations": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "podLabels": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "nodeSelector": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "tolerations": { + "type": "array" + }, + "affinity": { + "type": "object" + } + }, + "required": [ + "image", + "serviceAccount", + "service", + "appData", + "sourceData", + "env", + "probes" + ], + "definitions": { + "httpProbe": { + "type": "object", + "properties": { + "path": { + "type": "string", + "minLength": 1 + }, + "initialDelaySeconds": { + "type": "integer", + "minimum": 0 + }, + "periodSeconds": { + "type": "integer", + "minimum": 1 + }, + "timeoutSeconds": { + "type": "integer", + "minimum": 1 + }, + "failureThreshold": { + "type": "integer", + "minimum": 1 + } + }, + "required": [ + "path", + "initialDelaySeconds", + "periodSeconds", + "timeoutSeconds", + "failureThreshold" + ] + } + } +} diff --git a/examples/google-cloud-netapp-volumes-data-ingestor/values.yaml b/examples/google-cloud-netapp-volumes-data-ingestor/values.yaml new file mode 100644 index 000000000..483b51672 --- /dev/null +++ b/examples/google-cloud-netapp-volumes-data-ingestor/values.yaml @@ -0,0 +1,90 @@ +nameOverride: "" +fullnameOverride: gcnv-data-ingestor + +namespace: + # Helm charts cannot reliably create their own target namespace during install. + # Use `helm install --create-namespace` instead. + create: false + +selectorLabels: + name: nvidia-gcnv-rag-manager + instance: netapp-volumes-rag-ingestor + +replicaCount: 1 + +strategy: + type: Recreate + +image: + repository: "" + tag: "" + pullPolicy: IfNotPresent + pullSecrets: [] + +serviceAccount: + create: true + name: gcnv-data-ingestor-sa + automount: true + annotations: {} + +service: + type: NodePort + port: 8000 + annotations: {} + +appData: + create: true + existingClaim: "" + name: gcnv-ingestor-config-data + accessModes: + - ReadWriteOnce + size: 50Gi + storageClassName: "" + mountPath: /data + +sourceData: + create: true + existingClaim: "" + name: gcnv-data-for-rag + accessModes: + - ReadWriteOnce + size: 200Gi + storageClassName: "" + mountPath: /source + readOnly: true + +env: + scanOutputRoot: /data/scans + appDbPath: /data/state/app.db + defaultIncrementalSchedulerMins: "0" + nvIngestMode: ingestor + nvIngestEndpoint: "" + extra: [] + +probes: + liveness: + path: /healthz + initialDelaySeconds: 20 + periodSeconds: 15 + timeoutSeconds: 5 + failureThreshold: 3 + readiness: + path: /healthz + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + +resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: "2" + memory: 4Gi + +podAnnotations: {} +podLabels: {} +nodeSelector: {} +tolerations: [] +affinity: {} diff --git a/examples/nvidia_rag_mcp/mcp_server.py b/examples/nvidia_rag_mcp/mcp_server.py index e93744933..c2ac626a4 100644 --- a/examples/nvidia_rag_mcp/mcp_server.py +++ b/examples/nvidia_rag_mcp/mcp_server.py @@ -33,6 +33,9 @@ Environment variables: - VITE_API_CHAT_URL: Base URL for RAG HTTP API (default http://localhost:8081) - INGESTOR_URL: Base URL for Ingestor API (default http://127.0.0.1:8082) + - MCP_UPLOAD_DIR: Allowed base directory for file uploads (default: cwd). + File paths passed to upload/update tools are validated to be within this + directory, preventing path-traversal attacks. """ import argparse @@ -68,6 +71,36 @@ def _rag_base_url() -> str: return os.environ.get("VITE_API_CHAT_URL", "http://localhost:8081").rstrip("/") +def _upload_base_dir() -> str: + """ + Return the base directory that file upload paths must reside within. + Controlled by the ``MCP_UPLOAD_DIR`` environment variable; defaults to + the current working directory when unset. + """ + return os.environ.get("MCP_UPLOAD_DIR", os.getcwd()) + + +def _validate_file_path(path: str) -> str: + """ + Resolve *path* to an absolute, canonical path and verify it is located + within the allowed upload directory (``MCP_UPLOAD_DIR``). + + Returns the resolved path on success; raises ``ValueError`` otherwise. + + Security: uses ``os.path.realpath`` to follow symlinks so that + ``../../etc/passwd`` or symlink escapes are caught. + """ + base = os.path.realpath(_upload_base_dir()) + resolved = os.path.realpath(path) + # Ensure the resolved path starts with the base directory + if not resolved.startswith(base + os.sep) and resolved != base: + raise ValueError( + f"Path {path!r} (resolved to {resolved!r}) is not within the " + f"allowed upload directory {base!r}" + ) + return resolved + + @server.tool( "generate", description="""Generate an answer using NVIDIA RAG (optionally with knowledge base). @@ -503,6 +536,7 @@ async def tool_update_documents( form_data = aiohttp.FormData() for path in file_paths or []: + path = _validate_file_path(path) try: if os.path.exists(path): with open(path, "rb") as f: @@ -818,6 +852,7 @@ async def tool_upload_documents( form_data = aiohttp.FormData() # Add files for path in file_paths or []: + path = _validate_file_path(path) try: if os.path.exists(path): with open(path, "rb") as f: diff --git a/examples/rag_event_ingest/data/documents/Seahawks-Patriots in Super Bowl LX_ What We Learned from Seattle's 29-13 win.pdf b/examples/rag_event_ingest/data/documents/Seahawks-Patriots in Super Bowl LX_ What We Learned from Seattle's 29-13 win.pdf new file mode 100644 index 000000000..3d750564d Binary files /dev/null and b/examples/rag_event_ingest/data/documents/Seahawks-Patriots in Super Bowl LX_ What We Learned from Seattle's 29-13 win.pdf differ diff --git a/examples/rag_event_ingest/data/videos/Seattle Seahawks vs New England Patriots - Super Bowl LX Game Highlights.mp4 b/examples/rag_event_ingest/data/videos/Seattle Seahawks vs New England Patriots - Super Bowl LX Game Highlights.mp4 new file mode 100644 index 000000000..164dc505a --- /dev/null +++ b/examples/rag_event_ingest/data/videos/Seattle Seahawks vs New England Patriots - Super Bowl LX Game Highlights.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:618e8d92f34e1a7c3b5ea139b49bce1cf1d00eb4f15fd1963ee53ea8302f6c70 +size 83123435 diff --git a/examples/rag_event_ingest/deploy/docker-compose.yaml b/examples/rag_event_ingest/deploy/docker-compose.yaml new file mode 100644 index 000000000..05e5bbc0f --- /dev/null +++ b/examples/rag_event_ingest/deploy/docker-compose.yaml @@ -0,0 +1,164 @@ +# AIDP - AI Data Pipeline Docker Compose +# Event-driven document ingestion with Kafka + MinIO sources +# +# Usage: +# docker compose -f docker-compose.yaml up -d +# +# Prerequisites: +# - RAG stack running (from launchable.ipynb) +# - nvidia-rag network exists + +services: + # ============================================================================= + # KAFKA STACK (KRaft - no Zookeeper needed) + # ============================================================================= + kafka: + image: apache/kafka:latest + container_name: kafka + restart: unless-stopped + ports: + - "9092:9092" + - "9094:9094" + environment: + - KAFKA_NODE_ID=1 + - KAFKA_PROCESS_ROLES=broker,controller + - KAFKA_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093,EXTERNAL://:9094 + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092,EXTERNAL://${HOST_IP:-localhost}:9094 + - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=CONTROLLER:PLAINTEXT,PLAINTEXT:PLAINTEXT,EXTERNAL:PLAINTEXT + - KAFKA_CONTROLLER_QUORUM_VOTERS=1@kafka:9093 + - KAFKA_CONTROLLER_LISTENER_NAMES=CONTROLLER + - KAFKA_INTER_BROKER_LISTENER_NAME=PLAINTEXT + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 + - KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR=1 + - KAFKA_TRANSACTION_STATE_LOG_MIN_ISR=1 + - KAFKA_AUTO_CREATE_TOPICS_ENABLE=true + - KAFKA_LOG_RETENTION_HOURS=168 + - CLUSTER_ID=MkU3OEVBNTcwNTJENDM2Qk + volumes: + - kafka-data:/var/lib/kafka/data + networks: + - nvidia-rag + healthcheck: + test: ["CMD-SHELL", "/opt/kafka/bin/kafka-topics.sh --bootstrap-server localhost:9092 --list || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + + kafka-ui: + image: provectuslabs/kafka-ui:latest + container_name: aidp-kafka-ui + depends_on: + kafka: + condition: service_healthy + environment: + KAFKA_CLUSTERS_0_NAME: aidp-cluster + KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:9092 + ports: + - "8080:8080" + networks: + - nvidia-rag + + # ============================================================================= + # MINIO (Data Source) + # ============================================================================= + minio-source-1: + image: minio/minio:RELEASE.2024-01-18T22-51-28Z + container_name: aidp-minio + command: server /data --console-address ":9001" + environment: + MINIO_ROOT_USER: minioadmin + MINIO_ROOT_PASSWORD: minioadmin + # Kafka notification configuration + MINIO_NOTIFY_KAFKA_ENABLE_AIDP: "on" + MINIO_NOTIFY_KAFKA_BROKERS_AIDP: "kafka:9092" + MINIO_NOTIFY_KAFKA_TOPIC_AIDP: "aidp-topic" + volumes: + - minio-data:/data + ports: + - "9201:9000" + - "9211:9001" + networks: + - nvidia-rag + healthcheck: + test: ["CMD", "mc", "ready", "local"] + interval: 10s + timeout: 5s + retries: 5 + + # MinIO MC for bucket setup + minio-mc: + image: minio/mc:latest + container_name: aidp-minio-mc + depends_on: + minio-source-1: + condition: service_healthy + kafka: + condition: service_healthy + entrypoint: > + /bin/sh -c " + echo 'Waiting for MinIO...'; + sleep 5; + + echo 'Setting up MinIO...'; + mc alias set minio http://minio-source-1:9000 minioadmin minioadmin; + mc mb --ignore-existing minio/aidp-bucket; + mc event add minio/aidp-bucket arn:minio:sqs::AIDP:kafka --event put,delete || true; + + echo 'MinIO setup complete!'; + echo 'Bucket: aidp-bucket on minio-source-1'; + + echo 'Keeping container alive for mc commands...'; + tail -f /dev/null + " + networks: + - nvidia-rag + + # ============================================================================= + # KAFKA CONSUMER (Event-driven Ingestion) + # ============================================================================= + kafka-consumer: + build: + context: ../kafka_consumer + dockerfile: Dockerfile + image: kafka-consumer:local + container_name: kafka-consumer + depends_on: + kafka: + condition: service_healthy + minio-source-1: + condition: service_healthy + environment: + # Kafka + - KAFKA_BOOTSTRAP_SERVERS=kafka:9092 + - KAFKA_TOPIC=${KAFKA_TOPIC:-aidp-topic} + - CONSUMER_GROUP=${CONSUMER_GROUP:-nvingest-consumer-group} + # MinIO + - MINIO_ENDPOINT=minio-source-1:9000 + - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY:-minioadmin} + - MINIO_SECRET_KEY=${MINIO_SECRET_KEY:-minioadmin} + - MINIO_SECURE=false + # RAG Ingestor + - INGESTOR_SERVER_URL=${INGESTOR_SERVER_URL:-http://ingestor-server:8082} + - COLLECTION_NAME=${COLLECTION_NAME:-aidp_bucket} + # Logging + - LOG_LEVEL=${LOG_LEVEL:-INFO} + restart: unless-stopped + networks: + - nvidia-rag + +# ============================================================================= +# VOLUMES +# ============================================================================= +volumes: + kafka-data: + driver: local + minio-data: + driver: local + +# ============================================================================= +# NETWORKS +# ============================================================================= +networks: + nvidia-rag: + external: true + name: nvidia-rag diff --git a/examples/rag_event_ingest/kafka_consumer/Dockerfile b/examples/rag_event_ingest/kafka_consumer/Dockerfile new file mode 100644 index 000000000..d1ff4fc62 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY . /app/ + +CMD ["python", "-u", "main.py"] diff --git a/examples/rag_event_ingest/kafka_consumer/config/__init__.py b/examples/rag_event_ingest/kafka_consumer/config/__init__.py new file mode 100644 index 000000000..6bd140441 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/config/__init__.py @@ -0,0 +1,150 @@ +# config/__init__.py +"""Configuration package for Kafka MinIO Consumer. + +Usage: + import config.settings as cfg + print(cfg.INGESTOR_SERVER_URL) + + from config.constants import DOCUMENT_EXTENSIONS, DEST_RAG +""" + +# Settings (env vars) +from .settings import ( + # Kafka + KAFKA_BOOTSTRAP_SERVERS, + KAFKA_CONSUMER_GROUP, + KAFKA_TOPIC, + KAFKA_AUTO_OFFSET_RESET, + KAFKA_MAX_POLL_RECORDS, + KAFKA_MAX_POLL_INTERVAL_MS, + KAFKA_SESSION_TIMEOUT_MS, + KAFKA_HEARTBEAT_INTERVAL_MS, + # Services + INGESTOR_SERVER_URL, + INGESTOR_TIMEOUT, + # MinIO + MINIO_ENDPOINT, + MINIO_ACCESS_KEY, + MINIO_SECRET_KEY, + MINIO_SECURE, + MINIO_DEFAULT_COLLECTION, + MINIO_SOURCES, + # Features + ENABLE_IMAGE_PROCESSING, + ENABLE_AUDIO_PROCESSING, + # Collection + EMBEDDING_DIMENSION, + CHUNK_SIZE, + CHUNK_OVERLAP, + # Logging + LOG_LEVEL, + LOG_FORMAT, + # History + HISTORY_FILE, + # API Endpoints (configurable via env) + API_INGESTOR_DOCUMENTS, + API_INGESTOR_COLLECTIONS, + API_INGESTOR_COLLECTION, + API_INGESTOR_STATUS, +) + +# Constants +from .constants import ( + # File extensions + DOCUMENT_EXTENSIONS, + IMAGE_EXTENSIONS, + AUDIO_EXTENSIONS, + SKIP_EXTENSIONS, + # Content types + CONTENT_TYPE_MAP, + DEFAULT_CONTENT_TYPE, + # Routing + DEST_RAG, + DEST_SKIP, + DEST_UNKNOWN, + # S3 Event fields + EVENT_NAME, + EVENT_RECORDS, + EVENT_S3, + EVENT_BUCKET, + EVENT_OBJECT, + EVENT_KEY, + EVENT_SIZE, + EVENT_ETAG, + EVENT_NAME_FIELD, + EVENT_FIRST_RECORD_INDEX, + EVENT_PREFIX_CREATED, + EVENT_PREFIX_REMOVED, + EVENT_TYPE_CREATE, + EVENT_TYPE_DELETE, + # Record field names (dataclass attributes) + FIELD_FILE_NAME, + FIELD_BUCKET, + FIELD_COLLECTION, + FIELD_STATUS, + FIELD_START_TIME, + FIELD_END_TIME, + FIELD_DURATION_SECONDS, + FIELD_ERROR_MESSAGE, + FIELD_TASK_ID, + # Record serialization output keys + RECORD_FILE_NAME, + RECORD_BUCKET, + RECORD_COLLECTION, + RECORD_START_TIME, + RECORD_END_TIME, + RECORD_DURATION, + RECORD_STATUS, + RECORD_ERROR, + RECORD_TASK_ID, + # Status + STATUS_PENDING, + STATUS_PROCESSING, + STATUS_FINISHED, + STATUS_FAILED, + STATUS_SKIPPED, + STATUS_DELETED, + STATUS_SUCCESS, + # Config keys (MinIO sources) + CFG_ENDPOINT, + CFG_ACCESS, + CFG_SECRET, + CFG_SECURE, + CFG_COLLECTION, + CFG_BUCKETS, + # API request fields (Ingestor) + FIELD_COLLECTION_NAME, + FIELD_BLOCKING, + FIELD_SPLIT_OPTIONS, + FIELD_CHUNK_SIZE, + FIELD_CHUNK_OVERLAP, + FIELD_GENERATE_SUMMARY, + FIELD_EMBEDDING_DIMENSION, + FIELD_TASK_ID, + # API response fields + RESP_MESSAGE, + RESP_ERROR, + RESP_COLLECTIONS, + RESP_TASK_ID, + RESP_STATE, + RESP_RESULT, + RESP_FAILED_DOCUMENTS, + RESP_VALIDATION_ERRORS, + # Timeouts + TIMEOUT_DEFAULT, + TIMEOUT_UPLOAD, + TIMEOUT_TASK_CHECK, + TIMEOUT_MAX_TASK_WAIT, + # Kafka defaults + KAFKA_DEFAULT_TOPIC, + KAFKA_DEFAULT_CONSUMER_GROUP, + KAFKA_DEFAULT_AUTO_OFFSET_RESET, + KAFKA_DEFAULT_MAX_POLL_RECORDS, + KAFKA_DEFAULT_MAX_POLL_INTERVAL_MS, + KAFKA_DEFAULT_SESSION_TIMEOUT_MS, + KAFKA_DEFAULT_HEARTBEAT_INTERVAL_MS, + # Collection defaults + COLLECTION_EMBEDDING_DIMENSION, + COLLECTION_CHUNK_SIZE, + COLLECTION_CHUNK_OVERLAP, +) diff --git a/examples/rag_event_ingest/kafka_consumer/config/constants.py b/examples/rag_event_ingest/kafka_consumer/config/constants.py new file mode 100644 index 000000000..050cf70c4 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/config/constants.py @@ -0,0 +1,296 @@ +# config/constants.py +"""Static constants that don't change at runtime. + +For configurable values from environment, see settings.py +""" + +# ==================== File Extensions ==================== + +DOCUMENT_EXTENSIONS = frozenset({ + '.pdf', '.docx', '.doc', '.txt', '.md', '.rst', + '.html', '.htm', '.pptx', '.ppt', '.xlsx', '.xls', + '.csv', '.json', '.xml' +}) + +IMAGE_EXTENSIONS = frozenset({ + '.jpg', '.jpeg', '.png', '.gif', + '.webp', '.bmp', '.tiff', '.svg' +}) + +AUDIO_EXTENSIONS = frozenset({ + '.mp3', '.wav', '.flac', '.aac', + '.ogg', '.m4a', '.wma' +}) + +SKIP_EXTENSIONS = frozenset({ + '.tmp', '.log', '.bak', '.swp', '.DS_Store', + '.gitkeep', '.gitignore' +}) + + +# ==================== Content Types ==================== + +CONTENT_TYPE_MAP = { + # Documents + '.pdf': 'application/pdf', + '.txt': 'text/plain', + '.doc': 'application/msword', + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.html': 'text/html', + '.htm': 'text/html', + '.xml': 'application/xml', + '.json': 'application/json', + '.csv': 'text/csv', + '.md': 'text/markdown', + '.rst': 'text/x-rst', + '.ppt': 'application/vnd.ms-powerpoint', + '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + '.xls': 'application/vnd.ms-excel', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + # Images + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.gif': 'image/gif', + '.webp': 'image/webp', + '.bmp': 'image/bmp', + '.tiff': 'image/tiff', + '.svg': 'image/svg+xml', + # Audio + '.mp3': 'audio/mpeg', + '.wav': 'audio/wav', + '.flac': 'audio/flac', + '.aac': 'audio/aac', + '.ogg': 'audio/ogg', + '.m4a': 'audio/mp4', + '.wma': 'audio/x-ms-wma', +} + +DEFAULT_CONTENT_TYPE = 'application/octet-stream' + + +# ==================== Routing ==================== + +# Destinations +DEST_RAG = 'rag' +DEST_SKIP = 'skip' +DEST_UNKNOWN = 'unknown' + +# Route result keys +KEY_DESTINATION = 'destination' +KEY_FILE_TYPE = 'file_type' +KEY_EXTENSION = 'extension' +KEY_REASON = 'reason' + +# File types +FILE_TYPE_DOCUMENT = 'document' +FILE_TYPE_IMAGE = 'image' +FILE_TYPE_AUDIO = 'audio' +FILE_TYPE_SKIP = 'skip' +FILE_TYPE_UNKNOWN = 'unknown' + +# Config keys +CFG_DOCUMENT_EXTENSIONS = 'document_extensions' +CFG_IMAGE_EXTENSIONS = 'image_extensions' +CFG_AUDIO_EXTENSIONS = 'audio_extensions' +CFG_SKIP_EXTENSIONS = 'skip_extensions' +CFG_ENABLE_IMAGE_PROCESSING = 'enable_image_processing' +CFG_ENABLE_AUDIO_PROCESSING = 'enable_audio_processing' + + +# ==================== S3 Event Fields ==================== + +# Kafka S3 event structure +EVENT_NAME = 'EventName' +EVENT_RECORDS = 'Records' +EVENT_FIRST_RECORD_INDEX = 0 # S3 events typically contain single record +EVENT_S3 = 's3' +EVENT_BUCKET = 'bucket' +EVENT_OBJECT = 'object' +EVENT_KEY = 'key' +EVENT_SIZE = 'size' +EVENT_ETAG = 'eTag' +EVENT_NAME_FIELD = 'name' + +# Event type prefixes +EVENT_PREFIX_CREATED = 's3:ObjectCreated:' +EVENT_PREFIX_REMOVED = 's3:ObjectRemoved:' + +# Event type values +EVENT_TYPE_CREATE = 'create' +EVENT_TYPE_DELETE = 'delete' + + +# ==================== Record Fields ==================== + +# IngestionRecord field names (dataclass attributes) +FIELD_FILE_NAME = 'file_name' +FIELD_BUCKET = 'bucket' +FIELD_COLLECTION = 'collection' +FIELD_STATUS = 'status' +FIELD_START_TIME = 'start_time' +FIELD_END_TIME = 'end_time' +FIELD_DURATION_SECONDS = 'duration_seconds' +FIELD_ERROR_MESSAGE = 'error_message' +FIELD_TASK_ID = 'task_id' + +# IngestionRecord serialization output keys +RECORD_FILE_NAME = FIELD_FILE_NAME +RECORD_BUCKET = FIELD_BUCKET +RECORD_COLLECTION = FIELD_COLLECTION +RECORD_START_TIME = FIELD_START_TIME +RECORD_END_TIME = FIELD_END_TIME +RECORD_DURATION = FIELD_DURATION_SECONDS +RECORD_STATUS = FIELD_STATUS +RECORD_ERROR = FIELD_ERROR_MESSAGE +RECORD_TASK_ID = FIELD_TASK_ID + + +# ==================== Task Status ==================== + +STATUS_PENDING = 'PENDING' +STATUS_PROCESSING = 'PROCESSING' +STATUS_FINISHED = 'FINISHED' +STATUS_FAILED = 'FAILED' +STATUS_SKIPPED = 'SKIPPED' +STATUS_DELETED = 'DELETED' +STATUS_SUCCESS = 'SUCCESS' + + +# ==================== Config Keys ==================== + +# MinIO/S3 source config keys +CFG_ENDPOINT = 'endpoint' +CFG_ACCESS = 'access' +CFG_SECRET = 'secret' +CFG_SECURE = 'secure' +CFG_COLLECTION = 'collection' +CFG_BUCKETS = 'buckets' + + +# ==================== API Request Fields ==================== + +# Ingestor request fields +FIELD_COLLECTION_NAME = 'collection_name' +FIELD_BLOCKING = 'blocking' +FIELD_SPLIT_OPTIONS = 'split_options' +FIELD_CHUNK_SIZE = 'chunk_size' +FIELD_CHUNK_OVERLAP = 'chunk_overlap' +FIELD_GENERATE_SUMMARY = 'generate_summary' +FIELD_EMBEDDING_DIMENSION = 'embedding_dimension' +FIELD_TASK_ID = 'task_id' + + +# ==================== API Response Fields ==================== + +# Common response fields +RESP_CONTENT = 'content' +RESP_RESPONSE = 'response' +RESP_TEXT = 'text' +RESP_CHOICES = 'choices' +RESP_MESSAGE = 'message' +RESP_ERROR = 'error' + +# Ingestor response fields +RESP_COLLECTIONS = 'collections' +RESP_TASK_ID = 'task_id' +RESP_STATE = 'state' +RESP_RESULT = 'result' +RESP_FAILED_DOCUMENTS = 'failed_documents' +RESP_VALIDATION_ERRORS = 'validation_errors' + + +# ==================== Timeouts (seconds) ==================== + +TIMEOUT_DEFAULT = 30 +TIMEOUT_UPLOAD = 600 +TIMEOUT_TASK_CHECK = 30 +TIMEOUT_MAX_TASK_WAIT = 300 + + +# ==================== Kafka Defaults ==================== + +KAFKA_DEFAULT_TOPIC = 'aidp-topic' +KAFKA_DEFAULT_CONSUMER_GROUP = 'nvingest-consumer-group' +KAFKA_DEFAULT_AUTO_OFFSET_RESET = 'earliest' +KAFKA_DEFAULT_MAX_POLL_RECORDS = 1 +KAFKA_DEFAULT_MAX_POLL_INTERVAL_MS = 600000 # 10 min +KAFKA_DEFAULT_SESSION_TIMEOUT_MS = 60000 # 60s +KAFKA_DEFAULT_HEARTBEAT_INTERVAL_MS = 20000 # 20s + + +# ==================== Collection Defaults ==================== + +COLLECTION_EMBEDDING_DIMENSION = 2048 +COLLECTION_CHUNK_SIZE = 512 +COLLECTION_CHUNK_OVERLAP = 150 + + +# ==================== Environment Variable Keys ==================== + +# Kafka +ENV_KAFKA_BOOTSTRAP_SERVERS = 'KAFKA_BOOTSTRAP_SERVERS' +ENV_KAFKA_TOPIC = 'KAFKA_TOPIC' +ENV_CONSUMER_GROUP = 'CONSUMER_GROUP' +ENV_KAFKA_AUTO_OFFSET_RESET = 'KAFKA_AUTO_OFFSET_RESET' +ENV_KAFKA_MAX_POLL_RECORDS = 'KAFKA_MAX_POLL_RECORDS' +ENV_KAFKA_MAX_POLL_INTERVAL_MS = 'KAFKA_MAX_POLL_INTERVAL_MS' +ENV_KAFKA_SESSION_TIMEOUT_MS = 'KAFKA_SESSION_TIMEOUT_MS' +ENV_KAFKA_HEARTBEAT_INTERVAL_MS = 'KAFKA_HEARTBEAT_INTERVAL_MS' + +# Service URLs +ENV_INGESTOR_SERVER_URL = 'INGESTOR_SERVER_URL' +ENV_INGESTOR_TIMEOUT = 'INGESTOR_TIMEOUT' + +# API Endpoints +ENV_API_INGESTOR_DOCUMENTS = 'API_INGESTOR_DOCUMENTS' +ENV_API_INGESTOR_COLLECTIONS = 'API_INGESTOR_COLLECTIONS' +ENV_API_INGESTOR_COLLECTION = 'API_INGESTOR_COLLECTION' +ENV_API_INGESTOR_STATUS = 'API_INGESTOR_STATUS' + +# MinIO +ENV_MINIO_ENDPOINT = 'MINIO_ENDPOINT' +ENV_MINIO_ACCESS_KEY = 'MINIO_ACCESS_KEY' +ENV_MINIO_SECRET_KEY = 'MINIO_SECRET_KEY' +ENV_MINIO_SECURE = 'MINIO_SECURE' +ENV_COLLECTION_NAME = 'COLLECTION_NAME' +ENV_MINIO_SOURCES = 'MINIO_SOURCES' + +# Feature Flags +ENV_ENABLE_IMAGE_PROCESSING = 'ENABLE_IMAGE_PROCESSING' +ENV_ENABLE_AUDIO_PROCESSING = 'ENABLE_AUDIO_PROCESSING' + +# Collection Settings +ENV_EMBEDDING_DIMENSION = 'EMBEDDING_DIMENSION' +ENV_CHUNK_SIZE = 'CHUNK_SIZE' +ENV_CHUNK_OVERLAP = 'CHUNK_OVERLAP' + +# Logging +ENV_LOG_LEVEL = 'LOG_LEVEL' +ENV_LOG_FORMAT = 'LOG_FORMAT' + +# History +ENV_HISTORY_FILE = 'HISTORY_FILE' + +# ==================== API Endpoint Defaults ==================== + +DEFAULT_API_INGESTOR_DOCUMENTS = '/v1/documents' +DEFAULT_API_INGESTOR_COLLECTIONS = '/v1/collections' +DEFAULT_API_INGESTOR_COLLECTION = '/v1/collection' +DEFAULT_API_INGESTOR_STATUS = '/v1/status' + + +# ==================== Logging Defaults ==================== + +DEFAULT_LOG_LEVEL = 'INFO' +DEFAULT_LOG_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + + +# ==================== History Defaults ==================== + +DEFAULT_HISTORY_FILE = '/tmp/ingestion_history.jsonl' + + +# ==================== MinIO Defaults ==================== + +DEFAULT_COLLECTION_NAME = 'multimodal_data' diff --git a/examples/rag_event_ingest/kafka_consumer/config/settings.py b/examples/rag_event_ingest/kafka_consumer/config/settings.py new file mode 100644 index 000000000..bbfe4824c --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/config/settings.py @@ -0,0 +1,135 @@ +# config/settings.py +"""Runtime settings loaded from environment variables.""" + +import os + +from .constants import ( + # Default values + KAFKA_DEFAULT_TOPIC, + KAFKA_DEFAULT_CONSUMER_GROUP, + KAFKA_DEFAULT_AUTO_OFFSET_RESET, + KAFKA_DEFAULT_MAX_POLL_RECORDS, + KAFKA_DEFAULT_MAX_POLL_INTERVAL_MS, + KAFKA_DEFAULT_SESSION_TIMEOUT_MS, + KAFKA_DEFAULT_HEARTBEAT_INTERVAL_MS, + TIMEOUT_UPLOAD, + COLLECTION_EMBEDDING_DIMENSION, + COLLECTION_CHUNK_SIZE, + COLLECTION_CHUNK_OVERLAP, + # API Endpoint defaults + DEFAULT_API_INGESTOR_DOCUMENTS, + DEFAULT_API_INGESTOR_COLLECTIONS, + DEFAULT_API_INGESTOR_COLLECTION, + DEFAULT_API_INGESTOR_STATUS, + # Logging defaults + DEFAULT_LOG_LEVEL, + DEFAULT_LOG_FORMAT, + # History defaults + DEFAULT_HISTORY_FILE, + # MinIO defaults + DEFAULT_COLLECTION_NAME, + # Environment variable keys + ENV_KAFKA_BOOTSTRAP_SERVERS, + ENV_KAFKA_TOPIC, + ENV_CONSUMER_GROUP, + ENV_KAFKA_AUTO_OFFSET_RESET, + ENV_KAFKA_MAX_POLL_RECORDS, + ENV_KAFKA_MAX_POLL_INTERVAL_MS, + ENV_KAFKA_SESSION_TIMEOUT_MS, + ENV_KAFKA_HEARTBEAT_INTERVAL_MS, + ENV_INGESTOR_SERVER_URL, + ENV_INGESTOR_TIMEOUT, + ENV_API_INGESTOR_DOCUMENTS, + ENV_API_INGESTOR_COLLECTIONS, + ENV_API_INGESTOR_COLLECTION, + ENV_API_INGESTOR_STATUS, + ENV_MINIO_ENDPOINT, + ENV_MINIO_ACCESS_KEY, + ENV_MINIO_SECRET_KEY, + ENV_MINIO_SECURE, + ENV_COLLECTION_NAME, + ENV_MINIO_SOURCES, + ENV_ENABLE_IMAGE_PROCESSING, + ENV_ENABLE_AUDIO_PROCESSING, + ENV_EMBEDDING_DIMENSION, + ENV_CHUNK_SIZE, + ENV_CHUNK_OVERLAP, + ENV_LOG_LEVEL, + ENV_LOG_FORMAT, + ENV_HISTORY_FILE, +) + + +# ==================== Helper Functions ==================== + +def _get_bool(key: str, default: bool = False) -> bool: + """Get boolean from environment variable.""" + return os.getenv(key, str(default)).lower() in ('true', '1', 'yes', 'on') + + +def _get_int(key: str, default: int) -> int: + """Get integer from environment variable.""" + try: + return int(os.getenv(key, str(default))) + except ValueError: + return default + + +# ==================== Kafka Settings ==================== + +KAFKA_BOOTSTRAP_SERVERS = os.getenv(ENV_KAFKA_BOOTSTRAP_SERVERS) # Required +KAFKA_CONSUMER_GROUP = os.getenv(ENV_CONSUMER_GROUP, KAFKA_DEFAULT_CONSUMER_GROUP) +KAFKA_TOPIC = os.getenv(ENV_KAFKA_TOPIC, KAFKA_DEFAULT_TOPIC) +KAFKA_AUTO_OFFSET_RESET = os.getenv(ENV_KAFKA_AUTO_OFFSET_RESET, KAFKA_DEFAULT_AUTO_OFFSET_RESET) +KAFKA_MAX_POLL_RECORDS = _get_int(ENV_KAFKA_MAX_POLL_RECORDS, KAFKA_DEFAULT_MAX_POLL_RECORDS) +KAFKA_MAX_POLL_INTERVAL_MS = _get_int(ENV_KAFKA_MAX_POLL_INTERVAL_MS, KAFKA_DEFAULT_MAX_POLL_INTERVAL_MS) +KAFKA_SESSION_TIMEOUT_MS = _get_int(ENV_KAFKA_SESSION_TIMEOUT_MS, KAFKA_DEFAULT_SESSION_TIMEOUT_MS) +KAFKA_HEARTBEAT_INTERVAL_MS = _get_int(ENV_KAFKA_HEARTBEAT_INTERVAL_MS, KAFKA_DEFAULT_HEARTBEAT_INTERVAL_MS) + + +# ==================== Service URLs ==================== + +INGESTOR_SERVER_URL = os.getenv(ENV_INGESTOR_SERVER_URL) # Required +INGESTOR_TIMEOUT = _get_int(ENV_INGESTOR_TIMEOUT, TIMEOUT_UPLOAD) + +# API Endpoints - Ingestor Server +API_INGESTOR_DOCUMENTS = os.getenv(ENV_API_INGESTOR_DOCUMENTS, DEFAULT_API_INGESTOR_DOCUMENTS) +API_INGESTOR_COLLECTIONS = os.getenv(ENV_API_INGESTOR_COLLECTIONS, DEFAULT_API_INGESTOR_COLLECTIONS) +API_INGESTOR_COLLECTION = os.getenv(ENV_API_INGESTOR_COLLECTION, DEFAULT_API_INGESTOR_COLLECTION) +API_INGESTOR_STATUS = os.getenv(ENV_API_INGESTOR_STATUS, DEFAULT_API_INGESTOR_STATUS) + + +# ==================== MinIO Settings ==================== + +MINIO_ENDPOINT = os.getenv(ENV_MINIO_ENDPOINT) # Required +MINIO_ACCESS_KEY = os.getenv(ENV_MINIO_ACCESS_KEY) # Required +MINIO_SECRET_KEY = os.getenv(ENV_MINIO_SECRET_KEY) # Required +MINIO_SECURE = _get_bool(ENV_MINIO_SECURE, False) +# Single collection for all buckets - matches RAG server's COLLECTION_NAME +MINIO_DEFAULT_COLLECTION = os.getenv(ENV_COLLECTION_NAME, DEFAULT_COLLECTION_NAME) +MINIO_SOURCES = os.getenv(ENV_MINIO_SOURCES) # JSON config for multi-source + + +# ==================== Feature Flags ==================== + +ENABLE_IMAGE_PROCESSING = _get_bool(ENV_ENABLE_IMAGE_PROCESSING, False) +ENABLE_AUDIO_PROCESSING = _get_bool(ENV_ENABLE_AUDIO_PROCESSING, False) + + +# ==================== Collection Settings ==================== + +EMBEDDING_DIMENSION = _get_int(ENV_EMBEDDING_DIMENSION, COLLECTION_EMBEDDING_DIMENSION) +CHUNK_SIZE = _get_int(ENV_CHUNK_SIZE, COLLECTION_CHUNK_SIZE) +CHUNK_OVERLAP = _get_int(ENV_CHUNK_OVERLAP, COLLECTION_CHUNK_OVERLAP) + + +# ==================== Logging Settings ==================== + +LOG_LEVEL = os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL) +LOG_FORMAT = os.getenv(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT) + + +# ==================== History Settings ==================== + +HISTORY_FILE = os.getenv(ENV_HISTORY_FILE, DEFAULT_HISTORY_FILE) + diff --git a/examples/rag_event_ingest/kafka_consumer/consumer.py b/examples/rag_event_ingest/kafka_consumer/consumer.py new file mode 100644 index 000000000..87a5c0538 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/consumer.py @@ -0,0 +1,197 @@ +# consumer.py +"""Kafka consumer for MinIO S3 events.""" + +import json +import logging +from datetime import datetime +from typing import Dict, Optional +from kafka import KafkaConsumer + +import config.settings as cfg +from pathlib import Path +from config.constants import DEST_RAG, DEST_SKIP, STATUS_FAILED, KEY_DESTINATION, KEY_FILE_TYPE, KEY_REASON +from router import FileRouter +from models.events import S3Event, HandlerResult, IngestionRecord +from handlers.base import BaseHandler +from services.storage import ObjectStorage + +logger = logging.getLogger(__name__) + + +class KafkaEventConsumer: + """Kafka consumer that routes MinIO events to handlers.""" + + def __init__( + self, + handlers: Dict[str, BaseHandler], + storage: ObjectStorage, + history_file: str = '/tmp/ingestion_history.jsonl' + ): + """Initialize Kafka consumer.""" + self.handlers = handlers + self.storage = storage + self.history_file = history_file + self.router = FileRouter() + + logger.info(f"Connecting to Kafka: {cfg.KAFKA_BOOTSTRAP_SERVERS}") + logger.info(f"Consumer group: {cfg.KAFKA_CONSUMER_GROUP}") + + self.kafka_consumer = KafkaConsumer( + cfg.KAFKA_TOPIC, + bootstrap_servers=cfg.KAFKA_BOOTSTRAP_SERVERS.split(','), + value_deserializer=lambda m: json.loads(m.decode('utf-8')), + group_id=cfg.KAFKA_CONSUMER_GROUP, + auto_offset_reset=cfg.KAFKA_AUTO_OFFSET_RESET, + enable_auto_commit=True, + max_poll_records=cfg.KAFKA_MAX_POLL_RECORDS, + max_poll_interval_ms=cfg.KAFKA_MAX_POLL_INTERVAL_MS, + session_timeout_ms=cfg.KAFKA_SESSION_TIMEOUT_MS, + heartbeat_interval_ms=cfg.KAFKA_HEARTBEAT_INTERVAL_MS + ) + + logger.info("Kafka consumer initialized") + logger.info(f"Registered handlers: {list(self.handlers.keys())}") + + def process_event(self, raw_event: dict) -> Optional[HandlerResult]: + """Process a single MinIO S3 event.""" + start_time = datetime.now() + event: Optional[S3Event] = None + result: Optional[HandlerResult] = None + + try: + logger.info(f"Received event: {json.dumps(raw_event, indent=2)}") + + event = S3Event.from_kafka_message( + raw_event, + collection_resolver=self.storage.get_collection_for_bucket + ) + + if not event: + logger.warning("Invalid event format, skipping") + return None + + logger.info(f"Processing: {event.bucket}/{event.key} ({event.size} bytes)") + + if event.event_type == 'delete': + result = self._handle_delete(event) + else: + result = self._handle_create(event) + + return result + + except (json.JSONDecodeError, KeyError, ValueError) as e: + logger.error(f"Invalid event data: {e}") + result = HandlerResult.failed_result(str(e)) + return result + + except (IOError, OSError) as e: + logger.error(f"Storage error: {e}") + result = HandlerResult.failed_result(str(e)) + return result + + finally: + if event: + self._save_record(event, result, start_time) + + def _handle_delete(self, event: S3Event) -> HandlerResult: + """Handle S3 delete event.""" + logger.info(f"🗑️ DELETE event for {event.key}") + + doc_handler = self.handlers.get(DEST_RAG) + if not doc_handler or not hasattr(doc_handler, 'indexer'): + return HandlerResult.failed_result("Delete failed - no indexer available") + + indexer = doc_handler.indexer + success = indexer.delete(event.key, event.collection) + + if success: + logger.info(f"✓ Deleted {event.key} from Milvus") + return HandlerResult(success=True, status='DELETED') + + return HandlerResult.failed_result("Delete failed") + + def _handle_create(self, event: S3Event) -> HandlerResult: + """Handle S3 create event.""" + route_info = self.router.route(event.key) + destination = route_info[KEY_DESTINATION] + + logger.info(f"📁 {route_info[KEY_FILE_TYPE]} → {destination}") + + if destination == DEST_SKIP: + reason = route_info.get(KEY_REASON, 'Skipped by router') + logger.info(f"⏭️ Skipping: {reason}") + return HandlerResult.skipped_result(reason) + + handler = self.handlers.get(destination) + if not handler: + handler = self.handlers.get(DEST_RAG) + + if not handler: + return HandlerResult.failed_result(f"No handler for {destination}") + + return handler.handle(event) + + def _save_record(self, event: S3Event, result: Optional[HandlerResult], start_time: datetime): + """Save ingestion record to history file.""" + end_time = datetime.now() + duration = (end_time - start_time).total_seconds() + + record = IngestionRecord( + file_name=event.key, + bucket=event.bucket, + collection=event.collection, + status=result.status if result else STATUS_FAILED, + start_time=start_time, + end_time=end_time, + duration_seconds=duration, + error_message=result.error_message if result else None, + task_id=result.task_id if result else None + ) + + try: + with open(self.history_file, 'a') as f: + f.write(json.dumps(record.to_dict()) + '\n') + except (IOError, OSError) as e: + logger.error(f"Failed to save history: {e}") + + status_emoji = '✓' if record.status in ['SUCCESS', 'DELETED', 'SKIPPED'] else '✗' + logger.info( + f"{status_emoji} SUMMARY: {event.key} | " + f"Collection: {event.collection} | " + f"Duration: {duration:.2f}s | " + f"Status: {record.status}" + ) + + def run(self): + """Main consumer loop.""" + logger.info("Starting Kafka consumer loop...") + logger.info(f"Subscribed topics: {self.kafka_consumer.subscription()}") + logger.info("Waiting for messages...") + + try: + message_count = 0 + for message in self._poll_messages(): + message_count += 1 + logger.info( + f"[{message_count}] Message from " + f"partition {message.partition}, offset {message.offset}" + ) + self.process_event(message.value) + + except KeyboardInterrupt: + logger.info("Shutting down...") + finally: + self.kafka_consumer.close() + logger.info("Consumer closed") + + def _poll_messages(self): + """Generator that yields messages from Kafka.""" + while True: + msg_pack = self.kafka_consumer.poll(timeout_ms=5000, max_records=1) + + if not msg_pack: + logger.debug("No messages, continuing...") + continue + + for messages in msg_pack.values(): + yield from messages diff --git a/examples/rag_event_ingest/kafka_consumer/handlers/__init__.py b/examples/rag_event_ingest/kafka_consumer/handlers/__init__.py new file mode 100644 index 000000000..e6f3efcff --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/handlers/__init__.py @@ -0,0 +1,5 @@ +# Handlers package +from .base import BaseHandler +from .document import DocumentHandler + +__all__ = ['BaseHandler', 'DocumentHandler'] diff --git a/examples/rag_event_ingest/kafka_consumer/handlers/base.py b/examples/rag_event_ingest/kafka_consumer/handlers/base.py new file mode 100644 index 000000000..6745f2e09 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/handlers/base.py @@ -0,0 +1,43 @@ +# handlers/base.py +"""Base handler abstract class.""" + +from abc import ABC, abstractmethod +import logging + +from models.events import S3Event, HandlerResult + +logger = logging.getLogger(__name__) + + +class BaseHandler(ABC): + """Abstract base class for file handlers.""" + + @property + @abstractmethod + def name(self) -> str: + """Handler name for logging.""" + pass + + @abstractmethod + def handle(self, event: S3Event) -> HandlerResult: + """Process an S3 event. + + Args: + event: S3 event to process + + Returns: + HandlerResult with success status and optional task_id + """ + pass + + def log_start(self, event: S3Event): + """Log handler start.""" + logger.info(f"[{self.name}] Processing {event.bucket}/{event.key}") + + def log_success(self, event: S3Event, result: HandlerResult): + """Log successful handling.""" + logger.info(f"[{self.name}] ✓ {event.key} → {result.status}") + + def log_failure(self, event: S3Event, result: HandlerResult): + """Log failed handling.""" + logger.error(f"[{self.name}] ✗ {event.key}: {result.error_message}") diff --git a/examples/rag_event_ingest/kafka_consumer/handlers/document.py b/examples/rag_event_ingest/kafka_consumer/handlers/document.py new file mode 100644 index 000000000..1df03946d --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/handlers/document.py @@ -0,0 +1,89 @@ +# handlers/document.py +"""Handler for document files (PDF, DOCX, TXT, etc.).""" + +import logging + +import requests + +from .base import BaseHandler +from models.events import S3Event, HandlerResult +from services.storage import ObjectStorage +from services.document_indexer import DocumentIndexer + +logger = logging.getLogger(__name__) + + +class DocumentHandler(BaseHandler): + """Handler for document files - sends to RAG ingestor.""" + + def __init__(self, storage: ObjectStorage, indexer: DocumentIndexer): + """Initialize document handler. + + Args: + storage: Object storage for file downloads + indexer: Document indexer for RAG pipeline + """ + self.storage = storage + self.indexer = indexer + + @property + def name(self) -> str: + return "DocumentHandler" + + def handle(self, event: S3Event) -> HandlerResult: + """Process document file. + + 1. Delete existing entries (for updates) + 2. Download from MinIO + 3. Upload to ingestor + 4. Wait for completion + + Args: + event: S3 event with document info + + Returns: + HandlerResult with task_id for status tracking + """ + self.log_start(event) + + try: + # Step 1: Delete existing entries (handles updates) + logger.info(f"🔄 Checking for existing entries of {event.key}...") + self.indexer.delete(event.key, event.collection) + + # Step 2: Download from storage + logger.info(f"📥 Downloading from storage...") + file_data = self.storage.download(event.bucket, event.key) + + # Step 3: Upload to indexer + logger.info(f"📤 Sending to indexer...") + task_id = self.indexer.upload( + file_data=file_data, + filename=event.key, + collection=event.collection + ) + + if not task_id: + result = HandlerResult.failed_result("Indexer upload failed") + self.log_failure(event, result) + return result + + # Step 4: Wait for completion + logger.info(f"⏳ Waiting for indexing (task_id: {task_id})...") + success, message = self.indexer.check_status(task_id) + + if success: + result = HandlerResult.success_result(task_id=task_id) + self.log_success(event, result) + return result + else: + result = HandlerResult.failed_result(message, task_id=task_id) + self.log_failure(event, result) + return result + + except requests.RequestException as e: + logger.error(f"Network error processing document: {e}") + return HandlerResult.failed_result(str(e)) + except (IOError, OSError) as e: + logger.error(f"Storage error processing document: {e}") + return HandlerResult.failed_result(str(e)) diff --git a/examples/rag_event_ingest/kafka_consumer/main.py b/examples/rag_event_ingest/kafka_consumer/main.py new file mode 100644 index 000000000..df4384d4f --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/main.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +# main.py +"""Entry point for Kafka MinIO consumer.""" + +import logging + +import config.settings as cfg +from config.constants import DEST_RAG +from services import ObjectStorage, DocumentIndexer +from handlers import DocumentHandler +from consumer import KafkaEventConsumer + +logging.basicConfig( + level=getattr(logging, cfg.LOG_LEVEL, logging.INFO), + format=cfg.LOG_FORMAT +) +logger = logging.getLogger(__name__) + + +def main(): + """Initialize and run the Kafka consumer.""" + logger.info("=" * 60) + logger.info("Starting Kafka MinIO Consumer") + logger.info("=" * 60) + + # Initialize services + logger.info("Initializing services...") + storage = ObjectStorage() + indexer = DocumentIndexer(cfg.INGESTOR_SERVER_URL) + + # Initialize handlers + logger.info("Initializing handlers...") + handlers = { + DEST_RAG: DocumentHandler(storage, indexer), + } + + # Initialize consumer + logger.info("Initializing Kafka consumer...") + consumer = KafkaEventConsumer(handlers=handlers, storage=storage, history_file=cfg.HISTORY_FILE) + + # Run consumer loop + logger.info("Starting consumer loop...") + consumer.run() + + +if __name__ == '__main__': + main() diff --git a/examples/rag_event_ingest/kafka_consumer/models/__init__.py b/examples/rag_event_ingest/kafka_consumer/models/__init__.py new file mode 100644 index 000000000..2abce8a0d --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/models/__init__.py @@ -0,0 +1,4 @@ +# Models package +from .events import S3Event, HandlerResult, IngestionRecord + +__all__ = ['S3Event', 'HandlerResult', 'IngestionRecord'] diff --git a/examples/rag_event_ingest/kafka_consumer/models/events.py b/examples/rag_event_ingest/kafka_consumer/models/events.py new file mode 100644 index 000000000..4baf7112f --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/models/events.py @@ -0,0 +1,138 @@ +# models/events.py +"""Data models for Kafka consumer events and results.""" + +from dataclasses import dataclass, field, fields +from datetime import datetime +from typing import Any, Callable, ClassVar, Dict, Optional +from urllib.parse import unquote_plus + +from config.constants import ( + STATUS_SUCCESS, + STATUS_FAILED, + STATUS_SKIPPED, + # S3 Event fields + EVENT_NAME, + EVENT_RECORDS, + EVENT_FIRST_RECORD_INDEX, + EVENT_S3, + EVENT_BUCKET, + EVENT_OBJECT, + EVENT_KEY, + EVENT_SIZE, + EVENT_ETAG, + EVENT_NAME_FIELD, + EVENT_PREFIX_CREATED, + EVENT_PREFIX_REMOVED, + EVENT_TYPE_CREATE, + EVENT_TYPE_DELETE, + # Record field names (for transformers) + FIELD_START_TIME, + FIELD_END_TIME, + FIELD_DURATION_SECONDS, +) + + +@dataclass +class S3Event: + """Represents a MinIO S3 event from Kafka.""" + bucket: str + key: str + size: int + etag: str + event_type: str + collection: str = '' + + @classmethod + def from_kafka_message( + cls, + event: Dict[str, Any], + collection_resolver: Callable[[str], str] + ) -> Optional['S3Event']: + """Parse S3 event from Kafka message. + + Args: + event: Raw Kafka message value + collection_resolver: Function to resolve bucket -> collection name + """ + if EVENT_NAME not in event: + return None + + event_name = event[EVENT_NAME] + + if event_name.startswith(EVENT_PREFIX_CREATED): + event_type = EVENT_TYPE_CREATE + elif event_name.startswith(EVENT_PREFIX_REMOVED): + event_type = EVENT_TYPE_DELETE + else: + return None + + records = event.get(EVENT_RECORDS, []) + if not records: + return None + + record = records[EVENT_FIRST_RECORD_INDEX] + s3_data = record[EVENT_S3] + bucket = s3_data[EVENT_BUCKET][EVENT_NAME_FIELD] + obj_data = s3_data[EVENT_OBJECT] + key = unquote_plus(obj_data[EVENT_KEY]) + size = obj_data.get(EVENT_SIZE, 0) + etag = obj_data.get(EVENT_ETAG, '') + + return cls( + bucket=bucket, + key=key, + size=size, + etag=etag, + event_type=event_type, + collection=collection_resolver(bucket) + ) + + +@dataclass +class HandlerResult: + """Result from a handler execution.""" + success: bool + status: str # SUCCESS, FAILED, SKIPPED, DELETED + error_message: Optional[str] = None + task_id: Optional[str] = None # For RAG status tracking + + @classmethod + def success_result(cls, task_id: Optional[str] = None) -> 'HandlerResult': + return cls(success=True, status=STATUS_SUCCESS, task_id=task_id) + + @classmethod + def failed_result(cls, error: str, task_id: Optional[str] = None) -> 'HandlerResult': + return cls(success=False, status=STATUS_FAILED, error_message=error, task_id=task_id) + + @classmethod + def skipped_result(cls, reason: str) -> 'HandlerResult': + return cls(success=True, status=STATUS_SKIPPED, error_message=reason) + + +@dataclass +class IngestionRecord: + """Record of an ingestion operation for history tracking.""" + file_name: str + bucket: str + collection: str + status: str + start_time: datetime + end_time: datetime = field(default_factory=datetime.now) + duration_seconds: float = 0.0 + error_message: Optional[str] = None + task_id: Optional[str] = None + + _TRANSFORMERS: ClassVar[Dict[str, Callable]] = { + FIELD_START_TIME: lambda v: v.isoformat(), + FIELD_END_TIME: lambda v: v.isoformat(), + FIELD_DURATION_SECONDS: lambda v: round(v, 2), + } + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization.""" + result = {} + for f in fields(self): + value = getattr(self, f.name) + transform = self._TRANSFORMERS.get(f.name) + result[f.name] = transform(value) if transform else value + return result diff --git a/examples/rag_event_ingest/kafka_consumer/requirements.txt b/examples/rag_event_ingest/kafka_consumer/requirements.txt new file mode 100644 index 000000000..3f3818161 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/requirements.txt @@ -0,0 +1,4 @@ +kafka-python==2.0.2 +minio==7.2.0 +requests==2.31.0 +requests-toolbelt==1.0.0 diff --git a/examples/rag_event_ingest/kafka_consumer/router.py b/examples/rag_event_ingest/kafka_consumer/router.py new file mode 100644 index 000000000..41f5b8f23 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/router.py @@ -0,0 +1,91 @@ +# router.py +"""File routing module for MinIO event processing.""" + +import logging +from pathlib import Path +from typing import Dict, Any, List, Set, Union + +from config.constants import ( + DOCUMENT_EXTENSIONS, + IMAGE_EXTENSIONS, + AUDIO_EXTENSIONS, + SKIP_EXTENSIONS, + DEST_RAG, + DEST_SKIP, + KEY_DESTINATION, + KEY_FILE_TYPE, + KEY_EXTENSION, + KEY_REASON, + FILE_TYPE_DOCUMENT, + FILE_TYPE_IMAGE, + FILE_TYPE_AUDIO, + FILE_TYPE_SKIP, + FILE_TYPE_UNKNOWN, + CFG_DOCUMENT_EXTENSIONS, + CFG_IMAGE_EXTENSIONS, + CFG_AUDIO_EXTENSIONS, + CFG_SKIP_EXTENSIONS, + CFG_ENABLE_IMAGE_PROCESSING, + CFG_ENABLE_AUDIO_PROCESSING, +) + +logger = logging.getLogger(__name__) + + +class FileRouter: + """Routes files to appropriate processing services based on file type.""" + + def __init__(self, config: Union[Dict[str, Any], Any] = None): + """Initialize router with optional config overrides.""" + if config is None: + config = {} + elif hasattr(config, '__dataclass_fields__'): + config = { + CFG_DOCUMENT_EXTENSIONS: config.document_extensions, + CFG_IMAGE_EXTENSIONS: config.image_extensions, + CFG_AUDIO_EXTENSIONS: config.audio_extensions, + CFG_SKIP_EXTENSIONS: config.skip_extensions, + CFG_ENABLE_IMAGE_PROCESSING: config.enable_image_processing, + CFG_ENABLE_AUDIO_PROCESSING: config.enable_audio_processing, + } + + self.config = config + self.document_extensions = self._to_set(config.get(CFG_DOCUMENT_EXTENSIONS, DOCUMENT_EXTENSIONS)) + self.image_extensions = self._to_set(config.get(CFG_IMAGE_EXTENSIONS, IMAGE_EXTENSIONS)) + self.audio_extensions = self._to_set(config.get(CFG_AUDIO_EXTENSIONS, AUDIO_EXTENSIONS)) + self.skip_extensions = self._to_set(config.get(CFG_SKIP_EXTENSIONS, SKIP_EXTENSIONS)) + self.enable_image_processing = config.get(CFG_ENABLE_IMAGE_PROCESSING, False) + self.enable_audio_processing = config.get(CFG_ENABLE_AUDIO_PROCESSING, False) + + logger.info(f"FileRouter initialized - Documents: {len(self.document_extensions)} types") + + @staticmethod + def _to_set(value: Union[List, Set, None]) -> Set[str]: + if value is None: + return set() + return set(value) if isinstance(value, (list, tuple)) else value + + def route(self, filename: str) -> dict: + """Determine routing destination for a file.""" + ext = Path(filename).suffix.lower() + + if ext in self.skip_extensions: + return {KEY_DESTINATION: DEST_SKIP, KEY_FILE_TYPE: FILE_TYPE_SKIP, KEY_EXTENSION: ext, KEY_REASON: 'File extension in skip list'} + + if ext in self.document_extensions: + return {KEY_DESTINATION: DEST_RAG, KEY_FILE_TYPE: FILE_TYPE_DOCUMENT, KEY_EXTENSION: ext} + + if ext in self.image_extensions: + if self.enable_image_processing: + return {KEY_DESTINATION: DEST_RAG, KEY_FILE_TYPE: FILE_TYPE_IMAGE, KEY_EXTENSION: ext} + return {KEY_DESTINATION: DEST_SKIP, KEY_FILE_TYPE: FILE_TYPE_IMAGE, KEY_EXTENSION: ext, KEY_REASON: 'Image processing not enabled'} + + if ext in self.audio_extensions: + if self.enable_audio_processing: + return {KEY_DESTINATION: DEST_RAG, KEY_FILE_TYPE: FILE_TYPE_AUDIO, KEY_EXTENSION: ext} + return {KEY_DESTINATION: DEST_SKIP, KEY_FILE_TYPE: FILE_TYPE_AUDIO, KEY_EXTENSION: ext, KEY_REASON: 'Audio processing not enabled'} + + return {KEY_DESTINATION: DEST_RAG, KEY_FILE_TYPE: FILE_TYPE_UNKNOWN, KEY_EXTENSION: ext, KEY_REASON: 'Unknown extension, attempting RAG ingestion'} + + def is_document(self, filename: str) -> bool: + return Path(filename).suffix.lower() in self.document_extensions diff --git a/examples/rag_event_ingest/kafka_consumer/services/__init__.py b/examples/rag_event_ingest/kafka_consumer/services/__init__.py new file mode 100644 index 000000000..db2c0e347 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/services/__init__.py @@ -0,0 +1,7 @@ +# services/__init__.py +"""External service clients.""" + +from .storage import ObjectStorage +from .document_indexer import DocumentIndexer + +__all__ = ['ObjectStorage', 'DocumentIndexer'] diff --git a/examples/rag_event_ingest/kafka_consumer/services/document_indexer.py b/examples/rag_event_ingest/kafka_consumer/services/document_indexer.py new file mode 100644 index 000000000..ac60d41a2 --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/services/document_indexer.py @@ -0,0 +1,227 @@ +# services/document_indexer.py +"""Document indexing service for RAG pipeline.""" + +import json +import logging +import time +from pathlib import Path +from typing import Optional, Tuple +import requests + +from config import ( + API_INGESTOR_DOCUMENTS, + API_INGESTOR_COLLECTIONS, + API_INGESTOR_COLLECTION, + API_INGESTOR_STATUS, + STATUS_PENDING, + STATUS_PROCESSING, + STATUS_FINISHED, + STATUS_FAILED, + TIMEOUT_DEFAULT, + TIMEOUT_MAX_TASK_WAIT, + COLLECTION_EMBEDDING_DIMENSION, + COLLECTION_CHUNK_SIZE, + COLLECTION_CHUNK_OVERLAP, + CONTENT_TYPE_MAP, + DEFAULT_CONTENT_TYPE, + FIELD_COLLECTION_NAME, + FIELD_BLOCKING, + FIELD_SPLIT_OPTIONS, + FIELD_CHUNK_SIZE, + FIELD_CHUNK_OVERLAP, + FIELD_GENERATE_SUMMARY, + FIELD_EMBEDDING_DIMENSION, + FIELD_TASK_ID, + RESP_COLLECTIONS, + RESP_TASK_ID, + RESP_STATE, + RESP_RESULT, + RESP_FAILED_DOCUMENTS, + RESP_VALIDATION_ERRORS, + RESP_MESSAGE, + RESP_ERROR, +) + +logger = logging.getLogger(__name__) + + +class DocumentIndexer: + """Indexes documents in vector store for RAG retrieval.""" + + def __init__(self, base_url: str, timeout: int = 600): + """Initialize document indexer.""" + self.base_url = base_url.rstrip('/') + self.timeout = timeout + self._created_collections: set = set() + + logger.info(f"DocumentIndexer initialized: {self.base_url}") + + def ensure_collection_exists(self, collection_name: str) -> bool: + """Create collection if it doesn't exist.""" + if collection_name in self._created_collections: + return True + + # Check if collection exists + try: + response = requests.get( + f'{self.base_url}{API_INGESTOR_COLLECTIONS}', + timeout=TIMEOUT_DEFAULT + ) + except requests.RequestException as e: + logger.error(f"Error checking collections: {e}") + return False + + if response.status_code == 200: + collections = response.json().get(RESP_COLLECTIONS, []) + if collection_name in collections: + logger.info(f"Collection '{collection_name}' already exists") + self._created_collections.add(collection_name) + return True + + # Create collection + logger.info(f"Creating collection '{collection_name}'...") + try: + create_response = requests.post( + f'{self.base_url}{API_INGESTOR_COLLECTION}', + json={ + FIELD_COLLECTION_NAME: collection_name, + FIELD_EMBEDDING_DIMENSION: COLLECTION_EMBEDDING_DIMENSION, + 'metadata_schema': [] + }, + headers={'Content-Type': 'application/json'}, + timeout=TIMEOUT_DEFAULT + ) + except requests.RequestException as e: + logger.error(f"Error creating collection: {e}") + return False + + if create_response.status_code in [200, 201]: + logger.info(f"✓ Collection '{collection_name}' created") + self._created_collections.add(collection_name) + return True + + logger.error(f"Failed to create collection: {create_response.status_code}") + return False + + def upload( + self, + file_data: bytes, + filename: str, + collection: str, + chunk_size: int = COLLECTION_CHUNK_SIZE, + chunk_overlap: int = COLLECTION_CHUNK_OVERLAP + ) -> Optional[str]: + """Upload document to ingestor server.""" + if not self.ensure_collection_exists(collection): + logger.error("Failed to ensure collection exists") + return None + + content_type = self._get_content_type(filename) + files = {'documents': (filename, file_data, content_type)} + + data_config = { + FIELD_COLLECTION_NAME: collection, + FIELD_BLOCKING: False, + FIELD_SPLIT_OPTIONS: { + FIELD_CHUNK_SIZE: chunk_size, + FIELD_CHUNK_OVERLAP: chunk_overlap + }, + FIELD_GENERATE_SUMMARY: False + } + + logger.info(f"Uploading to collection: {collection}") + try: + response = requests.post( + f'{self.base_url}{API_INGESTOR_DOCUMENTS}', + files=files, + data={'data': json.dumps(data_config)}, + timeout=self.timeout + ) + except requests.RequestException as e: + logger.error(f"Error uploading document: {e}") + return None + + if response.status_code in [200, 201, 202]: + result = response.json() + task_id = result.get(RESP_TASK_ID) + if task_id: + logger.info(f"✓ File uploaded, task_id: {task_id}") + return task_id + logger.error("No task_id in response") + return None + + logger.error(f"Upload failed: {response.status_code} - {response.text}") + return None + + def check_status(self, task_id: str, max_wait: int = TIMEOUT_MAX_TASK_WAIT) -> Tuple[bool, str]: + """Check task status and wait for completion.""" + start_time = time.time() + + while time.time() - start_time < max_wait: + try: + response = requests.get( + f'{self.base_url}{API_INGESTOR_STATUS}', + params={FIELD_TASK_ID: task_id}, + timeout=TIMEOUT_DEFAULT + ) + except requests.RequestException as e: + return False, str(e) + + if response.status_code != 200: + return False, f"Status check failed: {response.status_code}" + + result = response.json() + state = result.get(RESP_STATE, 'UNKNOWN') + + if state == STATUS_FAILED: + return False, result.get(RESP_ERROR, 'Unknown error') + + if state == STATUS_FINISHED: + return self._parse_finished_result(result) + + if state in [STATUS_PENDING, STATUS_PROCESSING]: + elapsed = int(time.time() - start_time) + if elapsed % 5 == 0: + logger.info(f"Task {task_id}: {state} ({elapsed}s)") + + time.sleep(1) + + return False, f"Timeout after {max_wait}s" + + def _parse_finished_result(self, result: dict) -> Tuple[bool, str]: + """Parse result from a finished task.""" + task_result = result.get(RESP_RESULT, {}) + failed_docs = task_result.get(RESP_FAILED_DOCUMENTS, []) + validation_errors = task_result.get(RESP_VALIDATION_ERRORS, []) + + if failed_docs or validation_errors: + return False, f"Failed: {failed_docs}, Errors: {validation_errors}" + return True, task_result.get(RESP_MESSAGE, 'Completed') + + def delete(self, filename: str, collection: str) -> bool: + """Delete document from collection.""" + logger.info(f"Deleting '{filename}' from '{collection}'") + + try: + response = requests.delete( + f'{self.base_url}{API_INGESTOR_DOCUMENTS}', + params={FIELD_COLLECTION_NAME: collection}, + json=[filename], + headers={'Content-Type': 'application/json'}, + timeout=TIMEOUT_DEFAULT + ) + except requests.RequestException as e: + logger.error(f"Error deleting document: {e}") + return False + + if response.status_code in [200, 201, 204]: + logger.info(f"Deleted '{filename}'") + return True + + logger.error(f"Delete failed: {response.status_code}") + return False + + def _get_content_type(self, filename: str) -> str: + """Get content type from filename.""" + ext = Path(filename).suffix.lower() + return CONTENT_TYPE_MAP.get(ext, DEFAULT_CONTENT_TYPE) diff --git a/examples/rag_event_ingest/kafka_consumer/services/storage.py b/examples/rag_event_ingest/kafka_consumer/services/storage.py new file mode 100644 index 000000000..8f50a1f7b --- /dev/null +++ b/examples/rag_event_ingest/kafka_consumer/services/storage.py @@ -0,0 +1,195 @@ +# services/storage.py +"""S3-compatible object storage service.""" + +import io +import json +import logging +from abc import ABC, abstractmethod +from typing import Dict, Optional + +from minio import Minio +from minio.error import S3Error + +from config import ( + MINIO_ENDPOINT, + MINIO_ACCESS_KEY, + MINIO_SECRET_KEY, + MINIO_SECURE, + MINIO_DEFAULT_COLLECTION, + MINIO_SOURCES, + CFG_ENDPOINT, + CFG_ACCESS, + CFG_SECRET, + CFG_SECURE, + CFG_COLLECTION, + CFG_BUCKETS, +) + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Abstract Interface +# ============================================================================= + +class StorageBackend(ABC): + """Abstract interface for object storage operations. + + Implement this to add new backends (Azure Blob, GCS, etc.) + """ + + @abstractmethod + def download(self, bucket: str, key: str) -> bytes: + """Download file from storage.""" + pass + + @abstractmethod + def upload(self, bucket: str, key: str, data: bytes, content_type: Optional[str] = None) -> None: + """Upload file to storage.""" + pass + + @abstractmethod + def delete(self, bucket: str, key: str) -> None: + """Delete file from storage.""" + pass + + @abstractmethod + def exists(self, bucket: str, key: str) -> bool: + """Check if file exists.""" + pass + + +# ============================================================================= +# S3 Implementation +# ============================================================================= + +class S3Backend(StorageBackend): + """S3-compatible storage (MinIO, AWS S3, Wasabi, etc.).""" + + def __init__(self, client: Minio): + self._client = client + + @classmethod + def create( + cls, + endpoint: str, + access_key: str, + secret_key: str, + secure: bool = False, + ) -> 'S3Backend': + """Factory method to create S3 backend.""" + client = Minio(endpoint, access_key=access_key, secret_key=secret_key, secure=secure) + logger.info(f"Created S3 client: {endpoint}") + return cls(client) + + def download(self, bucket: str, key: str) -> bytes: + response = self._client.get_object(bucket, key) + try: + data = response.read() + finally: + response.close() + response.release_conn() + logger.info(f"Downloaded {bucket}/{key} ({len(data)} bytes)") + return data + + def upload(self, bucket: str, key: str, data: bytes, content_type: Optional[str] = None) -> None: + self._client.put_object( + bucket, key, io.BytesIO(data), + length=len(data), + content_type=content_type or 'application/octet-stream' + ) + logger.info(f"Uploaded {bucket}/{key}") + + def delete(self, bucket: str, key: str) -> None: + self._client.remove_object(bucket, key) + logger.info(f"Deleted {bucket}/{key}") + + def exists(self, bucket: str, key: str) -> bool: + try: + self._client.stat_object(bucket, key) + return True + except S3Error: + return False + + +# ============================================================================= +# Object Storage (Factory + Bucket Mapping) +# ============================================================================= + +class ObjectStorage: + """Object storage with bucket-to-collection mapping. + + Handles single or multiple S3 sources via configuration. + """ + + def __init__(self): + self._backends: Dict[str, StorageBackend] = {} + self._bucket_to_backend: Dict[str, str] = {} + self._bucket_to_collection: Dict[str, str] = {} + self._default_collection = MINIO_DEFAULT_COLLECTION + self._configure() + + def _configure(self): + if MINIO_SOURCES: + self._configure_multi_source(MINIO_SOURCES) + else: + self._configure_single_source() + + def _configure_single_source(self): + logger.info(f"Single S3 mode: {MINIO_ENDPOINT}") + self._backends['default'] = S3Backend.create( + MINIO_ENDPOINT, MINIO_ACCESS_KEY, MINIO_SECRET_KEY, MINIO_SECURE + ) + + def _configure_multi_source(self, sources_json: str): + config = json.loads(sources_json) + for name, src in config.items(): + self._configure_source(name, src) + + def _configure_source(self, name: str, src: dict): + """Configure a single S3 source and register its buckets.""" + logger.info(f"Configuring S3 source '{name}': {src[CFG_ENDPOINT]}") + + self._backends[name] = S3Backend.create( + src[CFG_ENDPOINT], + src.get(CFG_ACCESS, MINIO_ACCESS_KEY), + src.get(CFG_SECRET, MINIO_SECRET_KEY), + src.get(CFG_SECURE, False) + ) + + collection = src.get(CFG_COLLECTION, name.replace('-', '_')) + self._register_buckets(name, src.get(CFG_BUCKETS, []), collection) + + def _register_buckets(self, backend_name: str, buckets: list, collection: str): + """Register bucket-to-backend and bucket-to-collection mappings.""" + for bucket in buckets: + self._bucket_to_backend[bucket] = backend_name + self._bucket_to_collection[bucket] = collection + logger.info(f" {bucket} → {collection}") + + def _get_backend(self, bucket: str) -> StorageBackend: + if bucket in self._bucket_to_backend: + return self._backends[self._bucket_to_backend[bucket]] + return next(iter(self._backends.values())) + + def download(self, bucket: str, key: str) -> bytes: + return self._get_backend(bucket).download(bucket, key) + + def get_collection_for_bucket(self, bucket: str) -> str: + """Get collection name for bucket. + + Priority: + 1. Explicit mapping from MINIO_SOURCES config + 2. Default collection from COLLECTION_NAME env var + 3. Fallback: bucket name with hyphens → underscores + """ + # Check explicit mapping first + if bucket in self._bucket_to_collection: + return self._bucket_to_collection[bucket] + + # Use default collection if configured + if self._default_collection: + return self._default_collection + + # Fallback to bucket name conversion + return bucket.replace('-', '_') diff --git a/examples/rag_react_agent/pyproject.toml b/examples/rag_react_agent/pyproject.toml index fcebbcb3a..c4967a58a 100644 --- a/examples/rag_react_agent/pyproject.toml +++ b/examples/rag_react_agent/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ # Keep package version constraints as open as possible to avoid conflicts with other packages. Always define a minimum # version when adding a new package. If unsure, default to using `~=` instead of `==`. Does not apply to nvidia-nat packages. # Keep sorted!!! - "langgraph>=1.0.7", # Required for react_agent workflow + "langgraph>=1.0.8", # Required for react_agent workflow "langchain_classic", "nvidia-nat>=1.5.0a0,<2.0", # Allow pre-release versions "nvidia-nat-langchain>=1.5.0a0,<2.0", # Allow pre-release versions diff --git a/examples/rag_react_agent/uv.lock b/examples/rag_react_agent/uv.lock index 0554ef787..16af3b48d 100644 --- a/examples/rag_react_agent/uv.lock +++ b/examples/rag_react_agent/uv.lock @@ -324,6 +324,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cd/3a/577b549de0cc09d95f11087ee63c739bba856cd3952697eec4c4bb91350a/bleach-6.3.0-py3-none-any.whl", hash = "sha256:fe10ec77c93ddf3d13a73b035abaac7a9f5e436513864ccdad516693213c65d6", size = 164437, upload-time = "2025-10-27T17:57:37.538Z" }, ] +[[package]] +name = "blinker" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, +] + [[package]] name = "boto3" version = "1.40.61" @@ -597,21 +606,20 @@ name = "datasets" version = "4.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "dill" }, - { name = "filelock" }, - { name = "fsspec", extra = ["http"] }, - { name = "httpx" }, - { name = "huggingface-hub" }, - { name = "multiprocess" }, - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "packaging" }, - { name = "pandas" }, - { name = "pyarrow" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "tqdm" }, - { name = "xxhash" }, + { name = "dill", marker = "python_full_version >= '3.12'" }, + { name = "filelock", marker = "python_full_version >= '3.12'" }, + { name = "fsspec", extra = ["http"], marker = "python_full_version >= '3.12'" }, + { name = "httpx", marker = "python_full_version >= '3.12'" }, + { name = "huggingface-hub", marker = "python_full_version >= '3.12'" }, + { name = "multiprocess", marker = "python_full_version >= '3.12'" }, + { name = "numpy", marker = "python_full_version >= '3.12'" }, + { name = "packaging", marker = "python_full_version >= '3.12'" }, + { name = "pandas", marker = "python_full_version >= '3.12'" }, + { name = "pyarrow", marker = "python_full_version >= '3.12'" }, + { name = "pyyaml", marker = "python_full_version >= '3.12'" }, + { name = "requests", marker = "python_full_version >= '3.12'" }, + { name = "tqdm", marker = "python_full_version >= '3.12'" }, + { name = "xxhash", marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/55/bf/bb927bde63d649296c83e883171ae77074717c1b80fe2868b328bd0dbcbb/datasets-4.5.0.tar.gz", hash = "sha256:00c698ce1c2452e646cc5fad47fef39d3fe78dd650a8a6eb205bb45eb63cd500", size = 588384, upload-time = "2026-01-14T18:27:54.297Z" } wheels = [ @@ -736,6 +744,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970, upload-time = "2022-11-02T17:34:01.425Z" }, ] +[[package]] +name = "flask" +version = "3.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "blinker" }, + { name = "click" }, + { name = "itsdangerous" }, + { name = "jinja2" }, + { name = "markupsafe" }, + { name = "werkzeug" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/26/00/35d85dcce6c57fdc871f3867d465d780f302a175ea360f62533f12b27e2b/flask-3.1.3.tar.gz", hash = "sha256:0ef0e52b8a9cd932855379197dd8f94047b359ca0a78695144304cb45f87c9eb", size = 759004, upload-time = "2026-02-19T05:00:57.678Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/9c/34f6962f9b9e9c71f6e5ed806e0d0ff03c9d1b0b2340088a0cf4bce09b18/flask-3.1.3-py3-none-any.whl", hash = "sha256:f4bcbefc124291925f1a26446da31a5178f9483862233b23c0c96a20701f670c", size = 103424, upload-time = "2026-02-19T05:00:56.027Z" }, +] + [[package]] name = "flatbuffers" version = "25.12.19" @@ -828,7 +853,7 @@ wheels = [ [package.optional-dependencies] http = [ - { name = "aiohttp" }, + { name = "aiohttp", marker = "python_full_version >= '3.12'" }, ] [[package]] @@ -1078,6 +1103,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320, upload-time = "2024-10-08T23:04:09.501Z" }, ] +[[package]] +name = "itsdangerous" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1235,18 +1269,17 @@ wheels = [ [[package]] name = "langchain-aws" -version = "1.0.0" +version = "1.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "boto3" }, { name = "langchain-core" }, - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, { name = "pydantic" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/90/52/7e57fb7fc34c386625f66f0ab31da9cf2788b03ef15ae78ccd4c627b30cf/langchain_aws-1.0.0.tar.gz", hash = "sha256:597342bda0e7384e13590e9ab69c872ddcfbbf07d81ac6bb0f8a67970252212e", size = 214146, upload-time = "2025-10-17T19:06:49.001Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/1d/bb306951b1c394b7a27effb8eb6c9ee65dd77fcc4be7c20f76e3299a9e1e/langchain_aws-1.1.0.tar.gz", hash = "sha256:1e2f8570328eae4907c3cf7e900dc68d8034ddc865d9dc96823c9f9d8cccb901", size = 393899, upload-time = "2025-11-24T14:35:24.216Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/5d/5b3c07780a8eb4b916ffe504893896f87f318924c86dcbeb89562baa2d20/langchain_aws-1.0.0-py3-none-any.whl", hash = "sha256:68f6965b5030d0779b02e731ce1c910a5f4518bfe0e2ae82999a5342bc46dbd5", size = 150400, upload-time = "2025-10-17T19:06:47.926Z" }, + { url = "https://files.pythonhosted.org/packages/26/33/91b8d2a7570657b371382b45054142c54165a51706990a5c1b4cc40c0e9a/langchain_aws-1.1.0-py3-none-any.whl", hash = "sha256:8ec074615b42839e035354063717374c32c63f5028ef5221ba073fd5f3ef5e37", size = 152432, upload-time = "2025-11-24T14:35:23.004Z" }, ] [[package]] @@ -1278,8 +1311,7 @@ dependencies = [ { name = "langchain-classic" }, { name = "langchain-core" }, { name = "langsmith" }, - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, { name = "pydantic-settings" }, { name = "pyyaml" }, { name = "requests" }, @@ -1293,7 +1325,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "1.2.7" +version = "1.2.17" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonpatch" }, @@ -1305,9 +1337,23 @@ dependencies = [ { name = "typing-extensions" }, { name = "uuid-utils" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a2/0e/664d8d81b3493e09cbab72448d2f9d693d1fa5aa2bcc488602203a9b6da0/langchain_core-1.2.7.tar.gz", hash = "sha256:e1460639f96c352b4a41c375f25aeb8d16ffc1769499fb1c20503aad59305ced", size = 837039, upload-time = "2026-01-09T17:44:25.505Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/93/36226f593df52b871fc24d494c274f3a6b2ac76763a2806e7d35611634a1/langchain_core-1.2.17.tar.gz", hash = "sha256:54aa267f3311e347fb2e50951fe08e53761cebfb999ab80e6748d70525bbe872", size = 836130, upload-time = "2026-03-02T22:47:55.846Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/6f/34a9fba14d191a67f7e2ee3dbce3e9b86d2fa7310e2c7f2c713583481bd2/langchain_core-1.2.7-py3-none-any.whl", hash = "sha256:452f4fef7a3d883357b22600788d37e3d8854ef29da345b7ac7099f33c31828b", size = 490232, upload-time = "2026-01-09T17:44:24.236Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/073f33ab383a62908eca7ea699586dfea280e77182176e33199c80ddf22a/langchain_core-1.2.17-py3-none-any.whl", hash = "sha256:bf6bd6ce503874e9c2da1669a69383e967c3de1ea808921d19a9a6bff1a9fbbe", size = 502727, upload-time = "2026-03-02T22:47:54.537Z" }, +] + +[[package]] +name = "langchain-huggingface" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "langchain-core" }, + { name = "tokenizers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/5b/4910551367de5c6ec246616fcc0ddb0bc6f9e5d353d4a22dcb5ab1f87e60/langchain_huggingface-1.2.1.tar.gz", hash = "sha256:33d52a30a56775380c6b4321b78136a410eb079132a80fe7120ddd4b954b4efa", size = 253106, upload-time = "2026-03-02T18:44:39.163Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/90/a1440bfa467a6dd9025ad80f3c239554de28aec49dacfb369fda92871556/langchain_huggingface-1.2.1-py3-none-any.whl", hash = "sha256:0930c216a457d2c8dc7b39a756c39c567f1d88593bfee2c3441f3ae718435f0f", size = 30924, upload-time = "2026-03-02T18:44:37.745Z" }, ] [[package]] @@ -1338,16 +1384,17 @@ wheels = [ [[package]] name = "langchain-nvidia-ai-endpoints" -version = "1.0.3" +version = "1.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, { name = "filetype" }, { name = "langchain-core" }, + { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/9e/30814da280f7a79b168f83180f6a0396c166f86a566e56bb9877bf562611/langchain_nvidia_ai_endpoints-1.0.3.tar.gz", hash = "sha256:11c48fd24e4a9d4c86c65bcef943400f4e709497c93254c7dc97c43f68c2be89", size = 46526, upload-time = "2026-01-28T22:04:33.93Z" } +sdist = { url = "https://files.pythonhosted.org/packages/47/4b/e417af1b2b7f861f37e26bf4fa4b05cda4052002e3f84a966f0735baf94f/langchain_nvidia_ai_endpoints-1.2.0.tar.gz", hash = "sha256:4bd63b812707ea348a86539001aa9a89b3cba3ee56ade7379247a955e4bfd3eb", size = 53851, upload-time = "2026-03-10T17:55:08.127Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/04/c83f61106a245b74de11c1e075c1cc1e70462ece1dd9fc0584ad992a776d/langchain_nvidia_ai_endpoints-1.0.3-py3-none-any.whl", hash = "sha256:e5f170ad0a335637298bb90fb3df119793821e316355f61ab82f0106913eebbf", size = 50130, upload-time = "2026-01-28T22:04:33.065Z" }, + { url = "https://files.pythonhosted.org/packages/66/e4/186f1a99e4d30bd91c8438d024dc73a71c8f7e0657c7acb6e79658aa19cf/langchain_nvidia_ai_endpoints-1.2.0-py3-none-any.whl", hash = "sha256:c8e075d5b3d31216374af0cfa9e690ab28ada3ebbde34dd6d36fe16a26d883cc", size = 58269, upload-time = "2026-03-10T17:55:06.339Z" }, ] [[package]] @@ -1393,7 +1440,7 @@ wheels = [ [[package]] name = "langgraph" -version = "1.0.7" +version = "1.0.10" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, @@ -1403,9 +1450,9 @@ dependencies = [ { name = "pydantic" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/72/5b/f72655717c04e33d3b62f21b166dc063d192b53980e9e3be0e2a117f1c9f/langgraph-1.0.7.tar.gz", hash = "sha256:0cfdfee51e6e8cfe503ecc7367c73933437c505b03fa10a85c710975c8182d9a", size = 497098, upload-time = "2026-01-22T16:57:47.303Z" } +sdist = { url = "https://files.pythonhosted.org/packages/55/92/14df6fefba28c10caf1cb05aa5b8c7bf005838fe32a86d903b6c7cc4018d/langgraph-1.0.10.tar.gz", hash = "sha256:73bd10ee14a8020f31ef07e9cd4c1a70c35cc07b9c2b9cd637509a10d9d51e29", size = 511644, upload-time = "2026-02-27T21:04:38.743Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/0e/fe80144e3e4048e5d19ccdb91ac547c1a7dc3da8dbd1443e210048194c14/langgraph-1.0.7-py3-none-any.whl", hash = "sha256:9d68e8f8dd8f3de2fec45f9a06de05766d9b075b78fb03171779893b7a52c4d2", size = 157353, upload-time = "2026-01-22T16:57:45.997Z" }, + { url = "https://files.pythonhosted.org/packages/5d/60/260e0c04620a37ba8916b712766c341cc5fc685dabc6948c899494bbc2ae/langgraph-1.0.10-py3-none-any.whl", hash = "sha256:7c298bef4f6ea292fcf9824d6088fe41a6727e2904ad6066f240c4095af12247", size = 160920, upload-time = "2026-02-27T21:04:35.932Z" }, ] [[package]] @@ -1423,15 +1470,15 @@ wheels = [ [[package]] name = "langgraph-prebuilt" -version = "1.0.7" +version = "1.0.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, { name = "langgraph-checkpoint" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a7/59/711aecd1a50999456850dc328f3cad72b4372d8218838d8d5326f80cb76f/langgraph_prebuilt-1.0.7.tar.gz", hash = "sha256:38e097e06de810de4d0e028ffc0e432bb56d1fb417620fb1dfdc76c5e03e4bf9", size = 163692, upload-time = "2026-01-22T16:45:22.801Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/06/dd61a5c2dce009d1b03b1d56f2a85b3127659fdddf5b3be5d8f1d60820fb/langgraph_prebuilt-1.0.8.tar.gz", hash = "sha256:0cd3cf5473ced8a6cd687cc5294e08d3de57529d8dd14fdc6ae4899549efcf69", size = 164442, upload-time = "2026-02-19T18:14:39.083Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/47/49/5e37abb3f38a17a3487634abc2a5da87c208cc1d14577eb8d7184b25c886/langgraph_prebuilt-1.0.7-py3-none-any.whl", hash = "sha256:e14923516504405bb5edc3977085bc9622c35476b50c1808544490e13871fe7c", size = 35324, upload-time = "2026-01-22T16:45:21.784Z" }, + { url = "https://files.pythonhosted.org/packages/dc/41/ec966424ad3f2ed3996d24079d3342c8cd6c0bd0653c12b2a917a685ec6c/langgraph_prebuilt-1.0.8-py3-none-any.whl", hash = "sha256:d16a731e591ba4470f3e313a319c7eee7dbc40895bcf15c821f985a3522a7ce0", size = 35648, upload-time = "2026-02-19T18:14:37.611Z" }, ] [[package]] @@ -1592,20 +1639,20 @@ name = "mcp" version = "1.26.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "anyio" }, - { name = "httpx" }, - { name = "httpx-sse" }, - { name = "jsonschema" }, - { name = "pydantic" }, - { name = "pydantic-settings" }, - { name = "pyjwt", extra = ["crypto"] }, - { name = "python-multipart" }, - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "sse-starlette" }, - { name = "starlette" }, - { name = "typing-extensions" }, - { name = "typing-inspection" }, - { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, + { name = "anyio", marker = "python_full_version >= '3.12'" }, + { name = "httpx", marker = "python_full_version >= '3.12'" }, + { name = "httpx-sse", marker = "python_full_version >= '3.12'" }, + { name = "jsonschema", marker = "python_full_version >= '3.12'" }, + { name = "pydantic", marker = "python_full_version >= '3.12'" }, + { name = "pydantic-settings", marker = "python_full_version >= '3.12'" }, + { name = "pyjwt", extra = ["crypto"], marker = "python_full_version >= '3.12'" }, + { name = "python-multipart", marker = "python_full_version >= '3.12'" }, + { name = "pywin32", marker = "python_full_version >= '3.12' and sys_platform == 'win32'" }, + { name = "sse-starlette", marker = "python_full_version >= '3.12'" }, + { name = "starlette", marker = "python_full_version >= '3.12'" }, + { name = "typing-extensions", marker = "python_full_version >= '3.12'" }, + { name = "typing-inspection", marker = "python_full_version >= '3.12'" }, + { name = "uvicorn", marker = "python_full_version >= '3.12' and sys_platform != 'emscripten'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" } wheels = [ @@ -1746,7 +1793,7 @@ name = "multiprocess" version = "0.70.18" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "dill" }, + { name = "dill", marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" } wheels = [ @@ -1770,6 +1817,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, ] +[[package]] +name = "narwhals" +version = "2.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/75/59/81d0f4cad21484083466f278e6b392addd9f4205b48d45b5c8771670ebf8/narwhals-2.17.0.tar.gz", hash = "sha256:ebd5bc95bcfa2f8e89a8ac09e2765a63055162837208e67b42d6eeb6651d5e67", size = 620306, upload-time = "2026-02-23T09:44:34.142Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/27/20770bd6bf8fbe1e16f848ba21da9df061f38d2e6483952c29d2bb5d1d8b/narwhals-2.17.0-py3-none-any.whl", hash = "sha256:2ac5307b7c2b275a7d66eeda906b8605e3d7a760951e188dcfff86e8ebe083dd", size = 444897, upload-time = "2026-02-23T09:44:32.006Z" }, +] + [[package]] name = "nest-asyncio" version = "1.6.0" @@ -1797,41 +1853,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, ] -[[package]] -name = "numpy" -version = "1.26.4" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version < '3.12'", -] -sdist = { url = "https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129, upload-time = "2024-02-06T00:26:44.495Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/11/57/baae43d14fe163fa0e4c47f307b6b2511ab8d7d30177c491960504252053/numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", size = 20630554, upload-time = "2024-02-05T23:51:50.149Z" }, - { url = "https://files.pythonhosted.org/packages/1a/2e/151484f49fd03944c4a3ad9c418ed193cfd02724e138ac8a9505d056c582/numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", size = 13997127, upload-time = "2024-02-05T23:52:15.314Z" }, - { url = "https://files.pythonhosted.org/packages/79/ae/7e5b85136806f9dadf4878bf73cf223fe5c2636818ba3ab1c585d0403164/numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", size = 14222994, upload-time = "2024-02-05T23:52:47.569Z" }, - { url = "https://files.pythonhosted.org/packages/3a/d0/edc009c27b406c4f9cbc79274d6e46d634d139075492ad055e3d68445925/numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", size = 18252005, upload-time = "2024-02-05T23:53:15.637Z" }, - { url = "https://files.pythonhosted.org/packages/09/bf/2b1aaf8f525f2923ff6cfcf134ae5e750e279ac65ebf386c75a0cf6da06a/numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", size = 13885297, upload-time = "2024-02-05T23:53:42.16Z" }, - { url = "https://files.pythonhosted.org/packages/df/a0/4e0f14d847cfc2a633a1c8621d00724f3206cfeddeb66d35698c4e2cf3d2/numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", size = 18093567, upload-time = "2024-02-05T23:54:11.696Z" }, - { url = "https://files.pythonhosted.org/packages/d2/b7/a734c733286e10a7f1a8ad1ae8c90f2d33bf604a96548e0a4a3a6739b468/numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", size = 5968812, upload-time = "2024-02-05T23:54:26.453Z" }, - { url = "https://files.pythonhosted.org/packages/3f/6b/5610004206cf7f8e7ad91c5a85a8c71b2f2f8051a0c0c4d5916b76d6cbb2/numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", size = 15811913, upload-time = "2024-02-05T23:54:53.933Z" }, - { url = "https://files.pythonhosted.org/packages/95/12/8f2020a8e8b8383ac0177dc9570aad031a3beb12e38847f7129bacd96228/numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", size = 20335901, upload-time = "2024-02-05T23:55:32.801Z" }, - { url = "https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", size = 13685868, upload-time = "2024-02-05T23:55:56.28Z" }, - { url = "https://files.pythonhosted.org/packages/79/f8/97f10e6755e2a7d027ca783f63044d5b1bc1ae7acb12afe6a9b4286eac17/numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", size = 13925109, upload-time = "2024-02-05T23:56:20.368Z" }, - { url = "https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", size = 17950613, upload-time = "2024-02-05T23:56:56.054Z" }, - { url = "https://files.pythonhosted.org/packages/4c/0c/9c603826b6465e82591e05ca230dfc13376da512b25ccd0894709b054ed0/numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", size = 13572172, upload-time = "2024-02-05T23:57:21.56Z" }, - { url = "https://files.pythonhosted.org/packages/76/8c/2ba3902e1a0fc1c74962ea9bb33a534bb05984ad7ff9515bf8d07527cadd/numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", size = 17786643, upload-time = "2024-02-05T23:57:56.585Z" }, - { url = "https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", size = 5677803, upload-time = "2024-02-05T23:58:08.963Z" }, - { url = "https://files.pythonhosted.org/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", size = 15517754, upload-time = "2024-02-05T23:58:36.364Z" }, -] - [[package]] name = "numpy" version = "2.4.1" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.13'", - "python_full_version == '3.12.*'", -] sdist = { url = "https://files.pythonhosted.org/packages/24/62/ae72ff66c0f1fd959925b4c11f8c2dea61f47f6acaea75a08512cdfe3fed/numpy-2.4.1.tar.gz", hash = "sha256:a1ceafc5042451a858231588a104093474c6a5c57dcc724841f5c888d237d690", size = 20721320, upload-time = "2026-01-10T06:44:59.619Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/a5/34/2b1bc18424f3ad9af577f6ce23600319968a70575bd7db31ce66731bbef9/numpy-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0cce2a669e3c8ba02ee563c7835f92c153cf02edff1ae05e1823f1dde21b16a5", size = 16944563, upload-time = "2026-01-10T06:42:14.615Z" }, @@ -1890,67 +1915,129 @@ wheels = [ name = "nvidia-nat" version = "1.5.0a20260112" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.13'", + "python_full_version == '3.12.*'", +] +dependencies = [ + { name = "aioboto3", marker = "python_full_version >= '3.12'" }, + { name = "authlib", marker = "python_full_version >= '3.12'" }, + { name = "click", marker = "python_full_version >= '3.12'" }, + { name = "colorama", marker = "python_full_version >= '3.12'" }, + { name = "datasets", marker = "python_full_version >= '3.12'" }, + { name = "expandvars", marker = "python_full_version >= '3.12'" }, + { name = "fastapi", marker = "python_full_version >= '3.12'" }, + { name = "httpx", marker = "python_full_version >= '3.12'" }, + { name = "jinja2", marker = "python_full_version >= '3.12'" }, + { name = "jsonpath-ng", marker = "python_full_version >= '3.12'" }, + { name = "mcp", marker = "python_full_version >= '3.12'" }, + { name = "nest-asyncio2", marker = "python_full_version >= '3.12'" }, + { name = "networkx", marker = "python_full_version >= '3.12'" }, + { name = "numpy", marker = "python_full_version >= '3.12'" }, + { name = "openinference-semantic-conventions", marker = "python_full_version >= '3.12'" }, + { name = "openpyxl", marker = "python_full_version >= '3.12'" }, + { name = "optuna", marker = "python_full_version >= '3.12'" }, + { name = "pip", marker = "python_full_version >= '3.12'" }, + { name = "pkce", marker = "python_full_version >= '3.12'" }, + { name = "pkginfo", marker = "python_full_version >= '3.12'" }, + { name = "platformdirs", marker = "python_full_version >= '3.12'" }, + { name = "pydantic", marker = "python_full_version >= '3.12'" }, + { name = "pymilvus", marker = "python_full_version >= '3.12'" }, + { name = "python-dotenv", marker = "python_full_version >= '3.12'" }, + { name = "pyyaml", marker = "python_full_version >= '3.12'" }, + { name = "ragas", marker = "python_full_version >= '3.12'" }, + { name = "rich", marker = "python_full_version >= '3.12'" }, + { name = "tabulate", marker = "python_full_version >= '3.12'" }, + { name = "uvicorn", extra = ["standard"], marker = "python_full_version >= '3.12'" }, + { name = "wikipedia", marker = "python_full_version >= '3.12'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/e0/c7426ed15d1eb528eb0c9135efb66da033b0a56b63f42d4099b2fe05fd24/nvidia_nat-1.5.0a20260112-py3-none-any.whl", hash = "sha256:3d05c948efe0e3ab58e3d7a58ab90510d1a1128eb678810e1ef62efc5dfc9681", size = 950027, upload-time = "2026-01-12T10:46:15.705Z" }, +] + +[[package]] +name = "nvidia-nat" +version = "1.5.0a20260223" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.12'", +] +dependencies = [ + { name = "nvidia-nat-core", marker = "python_full_version < '3.12'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/7e/6e984de1473e8264d5cf6598d14f1c01f6dabf22f2fedda5f8e97140ae05/nvidia_nat-1.5.0a20260223-py3-none-any.whl", hash = "sha256:137461b310af90ed12e0496bac90ddb62297b00287707c80df48208437e2502a", size = 52704, upload-time = "2026-02-23T10:04:57.955Z" }, +] + +[[package]] +name = "nvidia-nat-core" +version = "1.5.0a20260223" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aioboto3" }, { name = "authlib" }, { name = "click" }, { name = "colorama" }, - { name = "datasets" }, { name = "expandvars" }, { name = "fastapi" }, + { name = "flask" }, { name = "httpx" }, { name = "jinja2" }, { name = "jsonpath-ng" }, - { name = "mcp" }, { name = "nest-asyncio2" }, { name = "networkx" }, - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, { name = "openinference-semantic-conventions" }, - { name = "openpyxl" }, { name = "optuna" }, + { name = "pandas" }, { name = "pip" }, { name = "pkce" }, { name = "pkginfo" }, { name = "platformdirs" }, + { name = "plotly" }, { name = "pydantic" }, + { name = "pyjwt" }, { name = "pymilvus" }, { name = "python-dotenv" }, + { name = "python-multipart" }, { name = "pyyaml" }, - { name = "ragas" }, { name = "rich" }, { name = "tabulate" }, + { name = "urllib3" }, { name = "uvicorn", extra = ["standard"] }, { name = "wikipedia" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/e0/c7426ed15d1eb528eb0c9135efb66da033b0a56b63f42d4099b2fe05fd24/nvidia_nat-1.5.0a20260112-py3-none-any.whl", hash = "sha256:3d05c948efe0e3ab58e3d7a58ab90510d1a1128eb678810e1ef62efc5dfc9681", size = 950027, upload-time = "2026-01-12T10:46:15.705Z" }, + { url = "https://files.pythonhosted.org/packages/b1/23/b043caadf08a72e4eb2c95bb65fed5083e7bf40af48ea92305fabf3b2820/nvidia_nat_core-1.5.0a20260223-py3-none-any.whl", hash = "sha256:5262cae48d66efbd53f98134e7820759121a1b4398b339e1d14d307ed2195a21", size = 762259, upload-time = "2026-02-23T10:01:44.692Z" }, ] [[package]] name = "nvidia-nat-langchain" -version = "1.5.0a20260112" +version = "1.5.0a20260223" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain" }, { name = "langchain-aws" }, { name = "langchain-classic" }, + { name = "langchain-community" }, { name = "langchain-core" }, + { name = "langchain-huggingface" }, { name = "langchain-litellm" }, { name = "langchain-milvus" }, { name = "langchain-nvidia-ai-endpoints" }, { name = "langchain-openai" }, { name = "langchain-tavily" }, { name = "langgraph" }, - { name = "nvidia-nat" }, + { name = "nvidia-nat-core" }, + { name = "openevals" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/84/2a/7a2cd2e7444ef03bdebb6c9637e63a9eee33da84e7c23baceb18f83f2250/nvidia_nat_langchain-1.5.0a20260112-py3-none-any.whl", hash = "sha256:cba64b0192d589f325cbbc2de60da8eb514efc27b49157b3eafca204ab989a55", size = 60925, upload-time = "2026-01-12T10:43:35.414Z" }, + { url = "https://files.pythonhosted.org/packages/98/65/e565dc570ecfdf4c4ca34d0d873794d33fdf11a8889a9b2b1a78ad15b589/nvidia_nat_langchain-1.5.0a20260223-py3-none-any.whl", hash = "sha256:87c70294c1f38fcd09252a79dd5e9038aee73326678dd9d7519b8064b914d7e4", size = 160480, upload-time = "2026-02-23T10:06:08.62Z" }, ] [[package]] name = "nvidia-rag" -version = "2.4.0.dev0" +version = "2.5.0.dev0" source = { editable = "../../" } dependencies = [ { name = "anyio" }, @@ -2016,16 +2103,16 @@ requires-dist = [ { name = "langchain-elasticsearch", marker = "extra == 'all'", specifier = ">=0.3" }, { name = "langchain-elasticsearch", marker = "extra == 'elasticsearch'", specifier = ">=0.3" }, { name = "langchain-milvus", specifier = ">=0.3.0" }, - { name = "langchain-nvidia-ai-endpoints", specifier = ">=1.0.3" }, + { name = "langchain-nvidia-ai-endpoints", specifier = ">=1.2.0" }, { name = "langchain-openai", marker = "extra == 'all'", specifier = ">=0.2" }, { name = "langchain-openai", marker = "extra == 'ingest'", specifier = ">=0.2" }, { name = "langchain-openai", marker = "extra == 'rag'", specifier = ">=0.2" }, { name = "lark", specifier = ">=1.2.2" }, { name = "minio", specifier = ">=7.2,<8.0" }, - { name = "nv-ingest-api", marker = "extra == 'all'", specifier = "==26.1.1" }, - { name = "nv-ingest-api", marker = "extra == 'ingest'", specifier = "==26.1.1" }, - { name = "nv-ingest-client", marker = "extra == 'all'", specifier = "==26.1.1" }, - { name = "nv-ingest-client", marker = "extra == 'ingest'", specifier = "==26.1.1" }, + { name = "nv-ingest-api", marker = "extra == 'all'", specifier = "==26.1.2" }, + { name = "nv-ingest-api", marker = "extra == 'ingest'", specifier = "==26.1.2" }, + { name = "nv-ingest-client", marker = "extra == 'all'", specifier = "==26.1.2" }, + { name = "nv-ingest-client", marker = "extra == 'ingest'", specifier = "==26.1.2" }, { name = "opentelemetry-api", marker = "extra == 'all'", specifier = ">=1.29,<2.0" }, { name = "opentelemetry-api", marker = "extra == 'ingest'", specifier = ">=1.29,<2.0" }, { name = "opentelemetry-api", marker = "extra == 'rag'", specifier = ">=1.29,<2.0" }, @@ -2094,8 +2181,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "coloredlogs" }, { name = "flatbuffers" }, - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, { name = "packaging" }, { name = "protobuf" }, { name = "sympy" }, @@ -2129,6 +2215,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/df/c306f7375d42bafb379934c2df4c2fa3964656c8c782bac75ee10c102818/openai-2.15.0-py3-none-any.whl", hash = "sha256:6ae23b932cd7230f7244e52954daa6602716d6b9bf235401a107af731baea6c3", size = 1067879, upload-time = "2026-01-09T22:10:06.446Z" }, ] +[[package]] +name = "openevals" +version = "0.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langchain" }, + { name = "langchain-openai" }, + { name = "langsmith" }, + { name = "rich" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d4/37/31e23ef661fa4c3c6a3c979afd884b30205512b4dde680b36d5909550500/openevals-0.1.3.tar.gz", hash = "sha256:9b00df1a7738464676aa887d4d950b77d3ef7024f6e8a54be3a83c82f485ea65", size = 100828, upload-time = "2025-12-18T04:09:03.034Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/68/162b0d273ffef5b0ad557ebccb790725bf94d78969702324dd5726828cf0/openevals-0.1.3-py3-none-any.whl", hash = "sha256:aed448df0cfdded732e24cda026eda065435a71ffb8c406a3ce73e590156d9f9", size = 67802, upload-time = "2025-12-18T04:09:01.59Z" }, +] + [[package]] name = "openinference-semantic-conventions" version = "0.1.25" @@ -2143,7 +2244,7 @@ name = "openpyxl" version = "3.1.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "et-xmlfile" }, + { name = "et-xmlfile", marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" } wheels = [ @@ -2392,8 +2493,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "alembic" }, { name = "colorlog" }, - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, { name = "packaging" }, { name = "pyyaml" }, { name = "sqlalchemy" }, @@ -2506,8 +2606,7 @@ name = "pandas" version = "2.3.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, { name = "python-dateutil" }, { name = "pytz" }, { name = "tzdata" }, @@ -2668,6 +2767,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/28/3bfe2fa5a7b9c46fe7e13c97bda14c895fb10fa2ebf1d0abb90e0cea7ee1/platformdirs-4.5.1-py3-none-any.whl", hash = "sha256:d03afa3963c806a9bed9d5125c8f4cb2fdaf74a55ab60e5d59b3fde758104d31", size = 18731, upload-time = "2025-12-05T13:52:56.823Z" }, ] +[[package]] +name = "plotly" +version = "6.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "narwhals" }, + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/fb/41efe84970cfddefd4ccf025e2cbfafe780004555f583e93dba3dac2cdef/plotly-6.6.0.tar.gz", hash = "sha256:b897f15f3b02028d69f755f236be890ba950d0a42d7dfc619b44e2d8cea8748c", size = 7027956, upload-time = "2026-03-02T21:10:25.321Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/52/d2/c6e44dba74f17c6216ce1b56044a9b93a929f1c2d5bdaff892512b260f5e/plotly-6.6.0-py3-none-any.whl", hash = "sha256:8d6daf0f87412e0c0bfe72e809d615217ab57cc715899a1e5145135a7800d1d0", size = 9910315, upload-time = "2026-03-02T21:10:18.131Z" }, +] + [[package]] name = "ply" version = "3.11" @@ -2963,7 +3075,7 @@ wheels = [ [package.optional-dependencies] crypto = [ - { name = "cryptography" }, + { name = "cryptography", marker = "python_full_version >= '3.12'" }, ] [[package]] @@ -2994,8 +3106,7 @@ name = "pymilvus-model" version = "0.3.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, { name = "onnxruntime" }, { name = "protobuf" }, { name = "scipy" }, @@ -3142,7 +3253,8 @@ source = { editable = "." } dependencies = [ { name = "langchain-classic" }, { name = "langgraph" }, - { name = "nvidia-nat" }, + { name = "nvidia-nat", version = "1.5.0a20260112", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "nvidia-nat", version = "1.5.0a20260223", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, { name = "nvidia-nat-langchain" }, { name = "nvidia-rag", extra = ["rag"] }, { name = "transformers" }, @@ -3151,7 +3263,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "langchain-classic" }, - { name = "langgraph", specifier = ">=1.0.7" }, + { name = "langgraph", specifier = ">=1.0.8" }, { name = "nvidia-nat", specifier = ">=1.5.0a0,<2.0" }, { name = "nvidia-nat-langchain", specifier = ">=1.5.0a0,<2.0" }, { name = "nvidia-rag", extras = ["rag"], editable = "../../" }, @@ -3163,19 +3275,18 @@ name = "ragas" version = "0.2.15" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "appdirs" }, - { name = "datasets" }, - { name = "diskcache" }, - { name = "langchain" }, - { name = "langchain-community" }, - { name = "langchain-core" }, - { name = "langchain-openai" }, - { name = "nest-asyncio" }, - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "openai" }, - { name = "pydantic" }, - { name = "tiktoken" }, + { name = "appdirs", marker = "python_full_version >= '3.12'" }, + { name = "datasets", marker = "python_full_version >= '3.12'" }, + { name = "diskcache", marker = "python_full_version >= '3.12'" }, + { name = "langchain", marker = "python_full_version >= '3.12'" }, + { name = "langchain-community", marker = "python_full_version >= '3.12'" }, + { name = "langchain-core", marker = "python_full_version >= '3.12'" }, + { name = "langchain-openai", marker = "python_full_version >= '3.12'" }, + { name = "nest-asyncio", marker = "python_full_version >= '3.12'" }, + { name = "numpy", marker = "python_full_version >= '3.12'" }, + { name = "openai", marker = "python_full_version >= '3.12'" }, + { name = "pydantic", marker = "python_full_version >= '3.12'" }, + { name = "tiktoken", marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6c/0f/04fddfa94744b1c3d8901aed8832a6b4193cc8e4886881f1bb88ff055350/ragas-0.2.15.tar.gz", hash = "sha256:2d0cd77b315a9c9c02ceb0a19ca8a48e82e1d02416587a2944ea51e6e327cd7b", size = 40867766, upload-time = "2025-04-24T16:39:28.734Z" } wheels = [ @@ -3438,8 +3549,7 @@ name = "scipy" version = "1.17.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, ] sdist = { url = "https://files.pythonhosted.org/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830, upload-time = "2026-01-10T21:34:23.009Z" } wheels = [ @@ -3569,7 +3679,7 @@ name = "sse-starlette" version = "3.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "anyio" }, + { name = "anyio", marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/db/3c/fa6517610dc641262b77cc7bf994ecd17465812c1b0585fe33e11be758ab/sse_starlette-3.0.3.tar.gz", hash = "sha256:88cfb08747e16200ea990c8ca876b03910a23b547ab3bd764c0d8eb81019b971", size = 21943, upload-time = "2025-10-30T18:44:20.117Z" } wheels = [ @@ -3703,8 +3813,7 @@ version = "5.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, - { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "numpy" }, { name = "packaging" }, { name = "pyyaml" }, { name = "regex" }, @@ -3976,6 +4085,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598, upload-time = "2026-01-10T09:23:45.395Z" }, ] +[[package]] +name = "werkzeug" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/f1/ee81806690a87dab5f5653c1f146c92bc066d7f4cebc603ef88eb9e13957/werkzeug-3.1.6.tar.gz", hash = "sha256:210c6bede5a420a913956b4791a7f4d6843a43b6fcee4dfa08a65e93007d0d25", size = 864736, upload-time = "2026-02-19T15:17:18.884Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/ec/d58832f89ede95652fd01f4f24236af7d32b70cab2196dfcc2d2fd13c5c2/werkzeug-3.1.6-py3-none-any.whl", hash = "sha256:7ddf3357bb9564e407607f988f683d72038551200c704012bb9a4c523d42f131", size = 225166, upload-time = "2026-02-19T15:17:17.475Z" }, +] + [[package]] name = "wikipedia" version = "1.4.0" diff --git a/frontend/Dockerfile b/frontend/Dockerfile index 1c0a6961d..c6f3e3b01 100644 --- a/frontend/Dockerfile +++ b/frontend/Dockerfile @@ -94,7 +94,7 @@ RUN if [ "$DOWNLOAD_LEGAL_COMPLIANCE" = "true" ] && [ -d /legal ]; then \ # Production stage - NVIDIA distroless (pre-approved) # Updated to latest version to address CVE-2025-9230 (libssl3) -FROM nvcr.io/nvidia/distroless/node:24-v3.1.3 +FROM nvcr.io/nvidia/distroless/node:24-v4.0.2 # Copy built application and config for production preview WORKDIR /app/frontend diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 2bbc357ea..681e85512 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -8,22 +8,25 @@ "name": "frontend", "version": "0.0.0", "dependencies": { - "@kui/react": "./src/assets/kui-react-0.400.5.tgz", + "@kui/react": "./src/assets/kui-foundations-react-external-0.504.1.tgz", "@tanstack/react-query": "^5.80.7", + "dompurify": "^3.3.3", + "lucide-react": "^0.559.0", "marked": "^15.0.12", "react": "^19.1.0", "react-dom": "^19.1.0", - "react-router-dom": "^7.6.2", + "react-router-dom": "^7.12.0", "uuid": "^11.1.0", "zustand": "^5.0.5" }, "devDependencies": { "@eslint/js": "^9.25.0", - "@kui/foundations": "file:src/assets/kui-foundations-0.401.2.tgz", + "@kui/foundations": "file:src/assets/kui-foundations-react-external-0.504.1.tgz", "@tailwindcss/vite": "^4.1.11", "@testing-library/jest-dom": "^6.6.4", "@testing-library/react": "^16.3.0", "@testing-library/user-event": "^14.6.1", + "@types/dompurify": "^3.0.5", "@types/node": "^24.5.1", "@types/react": "^19.1.2", "@types/react-dom": "^19.1.2", @@ -477,418 +480,1445 @@ "version": "1.4.1", "license": "MIT" }, - "node_modules/@esbuild/linux-x64": { + "node_modules/@esbuild/aix-ppc64": { "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.9.tgz", + "integrity": "sha512-OaGtL73Jck6pBKjNIe24BnFE6agGl+6KxDtTfHhy1HmhthfKouEcOhqpSL64K4/0WCtbKFLOdzD/44cJ4k9opA==", "cpu": [ - "x64" + "ppc64" ], "dev": true, "license": "MIT", "optional": true, "os": [ - "linux" + "aix" ], "engines": { "node": ">=18" } }, - "node_modules/@eslint-community/eslint-utils": { - "version": "4.7.0", + "node_modules/@esbuild/android-arm": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.9.tgz", + "integrity": "sha512-5WNI1DaMtxQ7t7B6xa572XMXpHAaI/9Hnhk8lcxF4zVN4xstUgTlvuGDorBguKEnZO70qwEcLpfifMLoxiPqHQ==", + "cpu": [ + "arm" + ], "dev": true, "license": "MIT", - "dependencies": { - "eslint-visitor-keys": "^3.4.3" - }, + "optional": true, + "os": [ + "android" + ], "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" - }, - "peerDependencies": { - "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" + "node": ">=18" } }, - "node_modules/@eslint-community/eslint-utils/node_modules/eslint-visitor-keys": { - "version": "3.4.3", + "node_modules/@esbuild/android-arm64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.9.tgz", + "integrity": "sha512-IDrddSmpSv51ftWslJMvl3Q2ZT98fUSL2/rlUXuVqRXHCs5EUF1/f+jbjF5+NG9UffUDMCiTyh8iec7u8RlTLg==", + "cpu": [ + "arm64" + ], "dev": true, - "license": "Apache-2.0", + "license": "MIT", + "optional": true, + "os": [ + "android" + ], "engines": { - "node": "^12.22.0 || ^14.17.0 || >=16.0.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" + "node": ">=18" } }, - "node_modules/@eslint-community/regexpp": { - "version": "4.12.1", + "node_modules/@esbuild/android-x64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.9.tgz", + "integrity": "sha512-I853iMZ1hWZdNllhVZKm34f4wErd4lMyeV7BLzEExGEIZYsOzqDWDf+y082izYUE8gtJnYHdeDpN/6tUdwvfiw==", + "cpu": [ + "x64" + ], "dev": true, "license": "MIT", + "optional": true, + "os": [ + "android" + ], "engines": { - "node": "^12.0.0 || ^14.0.0 || >=16.0.0" + "node": ">=18" } }, - "node_modules/@eslint/config-array": { - "version": "0.21.0", + "node_modules/@esbuild/darwin-arm64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.9.tgz", + "integrity": "sha512-XIpIDMAjOELi/9PB30vEbVMs3GV1v2zkkPnuyRRURbhqjyzIINwj+nbQATh4H9GxUgH1kFsEyQMxwiLFKUS6Rg==", + "cpu": [ + "arm64" + ], "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@eslint/object-schema": "^2.1.6", - "debug": "^4.3.1", - "minimatch": "^3.1.2" - }, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + "node": ">=18" } }, - "node_modules/@eslint/config-helpers": { - "version": "0.3.1", + "node_modules/@esbuild/darwin-x64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.9.tgz", + "integrity": "sha512-jhHfBzjYTA1IQu8VyrjCX4ApJDnH+ez+IYVEoJHeqJm9VhG9Dh2BYaJritkYK3vMaXrf7Ogr/0MQ8/MeIefsPQ==", + "cpu": [ + "x64" + ], "dev": true, - "license": "Apache-2.0", + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + "node": ">=18" } }, - "node_modules/@eslint/core": { - "version": "0.15.2", + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.9.tgz", + "integrity": "sha512-z93DmbnY6fX9+KdD4Ue/H6sYs+bhFQJNCPZsi4XWJoYblUqT06MQUdBCpcSfuiN72AbqeBFu5LVQTjfXDE2A6Q==", + "cpu": [ + "arm64" + ], "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@types/json-schema": "^7.0.15" - }, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + "node": ">=18" } }, - "node_modules/@eslint/eslintrc": { - "version": "3.3.1", + "node_modules/@esbuild/freebsd-x64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.9.tgz", + "integrity": "sha512-mrKX6H/vOyo5v71YfXWJxLVxgy1kyt1MQaD8wZJgJfG4gq4DpQGpgTB74e5yBeQdyMTbgxp0YtNj7NuHN0PoZg==", + "cpu": [ + "x64" + ], "dev": true, "license": "MIT", - "dependencies": { - "ajv": "^6.12.4", - "debug": "^4.3.2", - "espree": "^10.0.1", - "globals": "^14.0.0", - "ignore": "^5.2.0", - "import-fresh": "^3.2.1", - "js-yaml": "^4.1.0", - "minimatch": "^3.1.2", - "strip-json-comments": "^3.1.1" - }, + "optional": true, + "os": [ + "freebsd" + ], "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://opencollective.com/eslint" + "node": ">=18" } }, - "node_modules/@eslint/eslintrc/node_modules/globals": { - "version": "14.0.0", + "node_modules/@esbuild/linux-arm": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.9.tgz", + "integrity": "sha512-HBU2Xv78SMgaydBmdor38lg8YDnFKSARg1Q6AT0/y2ezUAKiZvc211RDFHlEZRFNRVhcMamiToo7bDx3VEOYQw==", + "cpu": [ + "arm" + ], "dev": true, "license": "MIT", + "optional": true, + "os": [ + "linux" + ], "engines": { "node": ">=18" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/@eslint/js": { - "version": "9.34.0", + "node_modules/@esbuild/linux-arm64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.9.tgz", + "integrity": "sha512-BlB7bIcLT3G26urh5Dmse7fiLmLXnRlopw4s8DalgZ8ef79Jj4aUcYbk90g8iCa2467HX8SAIidbL7gsqXHdRw==", + "cpu": [ + "arm64" + ], "dev": true, "license": "MIT", + "optional": true, + "os": [ + "linux" + ], "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" - }, - "funding": { - "url": "https://eslint.org/donate" + "node": ">=18" } }, - "node_modules/@eslint/object-schema": { - "version": "2.1.6", + "node_modules/@esbuild/linux-ia32": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.9.tgz", + "integrity": "sha512-e7S3MOJPZGp2QW6AK6+Ly81rC7oOSerQ+P8L0ta4FhVi+/j/v2yZzx5CqqDaWjtPFfYz21Vi1S0auHrap3Ma3A==", + "cpu": [ + "ia32" + ], "dev": true, - "license": "Apache-2.0", + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + "node": ">=18" } }, - "node_modules/@eslint/plugin-kit": { - "version": "0.3.5", + "node_modules/@esbuild/linux-loong64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.9.tgz", + "integrity": "sha512-Sbe10Bnn0oUAB2AalYztvGcK+o6YFFA/9829PhOCUS9vkJElXGdphz0A3DbMdP8gmKkqPmPcMJmJOrI3VYB1JQ==", + "cpu": [ + "loong64" + ], "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@eslint/core": "^0.15.2", - "levn": "^0.4.1" - }, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], "engines": { - "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + "node": ">=18" } }, - "node_modules/@floating-ui/core": { - "version": "1.7.3", + "node_modules/@esbuild/linux-mips64el": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.9.tgz", + "integrity": "sha512-YcM5br0mVyZw2jcQeLIkhWtKPeVfAerES5PvOzaDxVtIyZ2NUBZKNLjC5z3/fUlDgT6w89VsxP2qzNipOaaDyA==", + "cpu": [ + "mips64el" + ], + "dev": true, "license": "MIT", - "dependencies": { - "@floating-ui/utils": "^0.2.10" + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" } }, - "node_modules/@floating-ui/dom": { - "version": "1.7.4", + "node_modules/@esbuild/linux-ppc64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.9.tgz", + "integrity": "sha512-++0HQvasdo20JytyDpFvQtNrEsAgNG2CY1CLMwGXfFTKGBGQT3bOeLSYE2l1fYdvML5KUuwn9Z8L1EWe2tzs1w==", + "cpu": [ + "ppc64" + ], + "dev": true, "license": "MIT", - "dependencies": { - "@floating-ui/core": "^1.7.3", - "@floating-ui/utils": "^0.2.10" + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" } }, - "node_modules/@floating-ui/react-dom": { - "version": "2.1.6", + "node_modules/@esbuild/linux-riscv64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.9.tgz", + "integrity": "sha512-uNIBa279Y3fkjV+2cUjx36xkx7eSjb8IvnL01eXUKXez/CBHNRw5ekCGMPM0BcmqBxBcdgUWuUXmVWwm4CH9kg==", + "cpu": [ + "riscv64" + ], + "dev": true, "license": "MIT", - "dependencies": { - "@floating-ui/dom": "^1.7.4" - }, - "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" } }, - "node_modules/@floating-ui/utils": { - "version": "0.2.10", - "license": "MIT" - }, - "node_modules/@humanfs/core": { - "version": "0.19.1", + "node_modules/@esbuild/linux-s390x": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.9.tgz", + "integrity": "sha512-Mfiphvp3MjC/lctb+7D287Xw1DGzqJPb/J2aHHcHxflUo+8tmN/6d4k6I2yFR7BVo5/g7x2Monq4+Yew0EHRIA==", + "cpu": [ + "s390x" + ], "dev": true, - "license": "Apache-2.0", + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], "engines": { - "node": ">=18.18.0" + "node": ">=18" } }, - "node_modules/@humanfs/node": { - "version": "0.16.6", + "node_modules/@esbuild/linux-x64": { + "version": "0.25.9", + "cpu": [ + "x64" + ], "dev": true, - "license": "Apache-2.0", - "dependencies": { - "@humanfs/core": "^0.19.1", - "@humanwhocodes/retry": "^0.3.0" - }, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], "engines": { - "node": ">=18.18.0" + "node": ">=18" } }, - "node_modules/@humanfs/node/node_modules/@humanwhocodes/retry": { - "version": "0.3.1", + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.9.tgz", + "integrity": "sha512-9jNJl6FqaUG+COdQMjSCGW4QiMHH88xWbvZ+kRVblZsWrkXlABuGdFJ1E9L7HK+T0Yqd4akKNa/lO0+jDxQD4Q==", + "cpu": [ + "arm64" + ], "dev": true, - "license": "Apache-2.0", + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], "engines": { - "node": ">=18.18" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/nzakas" + "node": ">=18" } }, - "node_modules/@humanwhocodes/module-importer": { - "version": "1.0.1", + "node_modules/@esbuild/netbsd-x64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.9.tgz", + "integrity": "sha512-RLLdkflmqRG8KanPGOU7Rpg829ZHu8nFy5Pqdi9U01VYtG9Y0zOG6Vr2z4/S+/3zIyOxiK6cCeYNWOFR9QP87g==", + "cpu": [ + "x64" + ], "dev": true, - "license": "Apache-2.0", + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], "engines": { - "node": ">=12.22" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/nzakas" + "node": ">=18" } }, - "node_modules/@humanwhocodes/retry": { - "version": "0.4.3", + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.9.tgz", + "integrity": "sha512-YaFBlPGeDasft5IIM+CQAhJAqS3St3nJzDEgsgFixcfZeyGPCd6eJBWzke5piZuZ7CtL656eOSYKk4Ls2C0FRQ==", + "cpu": [ + "arm64" + ], "dev": true, - "license": "Apache-2.0", + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], "engines": { - "node": ">=18.18" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/nzakas" + "node": ">=18" } }, - "node_modules/@isaacs/cliui": { - "version": "8.0.2", + "node_modules/@esbuild/openbsd-x64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.9.tgz", + "integrity": "sha512-1MkgTCuvMGWuqVtAvkpkXFmtL8XhWy+j4jaSO2wxfJtilVCi0ZE37b8uOdMItIHz4I6z1bWWtEX4CJwcKYLcuA==", + "cpu": [ + "x64" + ], "dev": true, - "license": "ISC", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], "engines": { - "node": ">=12" + "node": ">=18" } }, - "node_modules/@isaacs/fs-minipass": { - "version": "4.0.1", + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.9.tgz", + "integrity": "sha512-4Xd0xNiMVXKh6Fa7HEJQbrpP3m3DDn43jKxMjxLLRjWnRsfxjORYJlXPO4JNcXtOyfajXorRKY9NkOpTHptErg==", + "cpu": [ + "arm64" + ], "dev": true, - "license": "ISC", - "dependencies": { - "minipass": "^7.0.4" - }, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], "engines": { - "node": ">=18.0.0" + "node": ">=18" } }, - "node_modules/@istanbuljs/schema": { - "version": "0.1.3", + "node_modules/@esbuild/sunos-x64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.9.tgz", + "integrity": "sha512-WjH4s6hzo00nNezhp3wFIAfmGZ8U7KtrJNlFMRKxiI9mxEK1scOMAaa9i4crUtu+tBr+0IN6JCuAcSBJZfnphw==", + "cpu": [ + "x64" + ], "dev": true, "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], "engines": { - "node": ">=8" + "node": ">=18" } }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.13", + "node_modules/@esbuild/win32-arm64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.9.tgz", + "integrity": "sha512-mGFrVJHmZiRqmP8xFOc6b84/7xa5y5YvR1x8djzXpJBSv/UsNK6aqec+6JDjConTgvvQefdGhFDAs2DLAds6gQ==", + "cpu": [ + "arm64" + ], "dev": true, "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0", - "@jridgewell/trace-mapping": "^0.3.24" + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" } }, - "node_modules/@jridgewell/remapping": { - "version": "2.3.5", + "node_modules/@esbuild/win32-ia32": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.9.tgz", + "integrity": "sha512-b33gLVU2k11nVx1OhX3C8QQP6UHQK4ZtN56oFWvVXvz2VkDoe6fbG8TOgHFxEvqeqohmRnIHe5A1+HADk4OQww==", + "cpu": [ + "ia32" + ], "dev": true, "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.5", - "@jridgewell/trace-mapping": "^0.3.24" + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" } }, - "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", + "node_modules/@esbuild/win32-x64": { + "version": "0.25.9", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.9.tgz", + "integrity": "sha512-PPOl1mi6lpLNQxnGoyAfschAodRFYXJ+9fs6WHXz7CSWKbOqiMZsubC+BQsVKuul+3vKLuwTHsS2c2y9EoKwxQ==", + "cpu": [ + "x64" + ], "dev": true, "license": "MIT", + "optional": true, + "os": [ + "win32" + ], "engines": { - "node": ">=6.0.0" + "node": ">=18" } }, - "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.5", + "node_modules/@eslint-community/eslint-utils": { + "version": "4.7.0", "dev": true, - "license": "MIT" + "license": "MIT", + "dependencies": { + "eslint-visitor-keys": "^3.4.3" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + }, + "peerDependencies": { + "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" + } }, - "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.30", + "node_modules/@eslint-community/eslint-utils/node_modules/eslint-visitor-keys": { + "version": "3.4.3", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint-community/regexpp": { + "version": "4.12.1", "dev": true, "license": "MIT", + "engines": { + "node": "^12.0.0 || ^14.0.0 || >=16.0.0" + } + }, + "node_modules/@eslint/config-array": { + "version": "0.21.0", + "dev": true, + "license": "Apache-2.0", "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" + "@eslint/object-schema": "^2.1.6", + "debug": "^4.3.1", + "minimatch": "^3.1.2" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, - "node_modules/@kui/foundations": { - "version": "0.401.2", - "resolved": "file:src/assets/kui-foundations-0.401.2.tgz", - "integrity": "sha512-jWmDuxPhQz0XzaCv/pb+xSKkHbPEnFKtzgcc3Yzx9DB4GJmmTvS7pgRSt0KL9ISN7zyn+K/rRlbXObANpPWQeA==" + "node_modules/@eslint/config-helpers": { + "version": "0.3.1", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } }, - "node_modules/@kui/react": { - "version": "0.400.5", - "resolved": "file:src/assets/kui-react-0.400.5.tgz", - "integrity": "sha512-cHmxFrE7uu3I2rg3woW7BJVLALciJwleRZDmh21Nnf1t5Q/TWAQxq+FRURrTSIUE++KDBPcdX0Lyza7bIuuGZg==", + "node_modules/@eslint/core": { + "version": "0.15.2", + "dev": true, + "license": "Apache-2.0", "dependencies": { - "@ariakit/react": "^0.4.17", - "@date-fns/tz": "^1.2.0", - "@kui/foundations": "^0.401.2", - "@radix-ui/react-accordion": "^1.2.3", - "@radix-ui/react-avatar": "^1.1.3", - "@radix-ui/react-checkbox": "^1.1.4", - "@radix-ui/react-dialog": "^1.1.6", - "@radix-ui/react-dropdown-menu": "^2.1.6", - "@radix-ui/react-label": "^2.1.2", - "@radix-ui/react-popover": "^1.1.6", - "@radix-ui/react-primitive": "^2.0.2", - "@radix-ui/react-progress": "^1.1.2", - "@radix-ui/react-radio-group": "^1.2.3", - "@radix-ui/react-slider": "^1.2.3", - "@radix-ui/react-slot": "^1.1.2", - "@radix-ui/react-switch": "^1.1.3", - "@radix-ui/react-tabs": "^1.1.3", - "@radix-ui/react-tooltip": "^1.1.8", - "class-variance-authority": "^0.7.0", - "date-fns": "^4.1.0", - "fast-equals": "^5.2.2", - "react-day-picker": "^9.7.0" + "@types/json-schema": "^7.0.15" }, - "peerDependencies": { - "react": ">=18", - "react-dom": ">=18" + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", + "node_modules/@eslint/eslintrc": { + "version": "3.3.1", "dev": true, "license": "MIT", "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" + "ajv": "^6.12.4", + "debug": "^4.3.2", + "espree": "^10.0.1", + "globals": "^14.0.0", + "ignore": "^5.2.0", + "import-fresh": "^3.2.1", + "js-yaml": "^4.1.0", + "minimatch": "^3.1.2", + "strip-json-comments": "^3.1.1" }, "engines": { - "node": ">= 8" + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" } }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.5", + "node_modules/@eslint/eslintrc/node_modules/globals": { + "version": "14.0.0", "dev": true, "license": "MIT", "engines": { - "node": ">= 8" + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.8", + "node_modules/@eslint/js": { + "version": "9.34.0", "dev": true, "license": "MIT", - "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" - }, "engines": { - "node": ">= 8" + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "url": "https://eslint.org/donate" } }, - "node_modules/@pkgjs/parseargs": { - "version": "0.11.0", + "node_modules/@eslint/object-schema": { + "version": "2.1.6", "dev": true, - "license": "MIT", - "optional": true, + "license": "Apache-2.0", "engines": { - "node": ">=14" + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" } }, - "node_modules/@polka/url": { - "version": "1.0.0-next.29", + "node_modules/@eslint/plugin-kit": { + "version": "0.3.5", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@eslint/core": "^0.15.2", + "levn": "^0.4.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@floating-ui/core": { + "version": "1.7.5", + "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.7.5.tgz", + "integrity": "sha512-1Ih4WTWyw0+lKyFMcBHGbb5U5FtuHJuujoyyr5zTaWS5EYMeT6Jb2AuDeftsCsEuchO+mM2ij5+q9crhydzLhQ==", + "license": "MIT", + "dependencies": { + "@floating-ui/utils": "^0.2.11" + } + }, + "node_modules/@floating-ui/dom": { + "version": "1.7.6", + "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.7.6.tgz", + "integrity": "sha512-9gZSAI5XM36880PPMm//9dfiEngYoC6Am2izES1FF406YFsjvyBMmeJ2g4SAju3xWwtuynNRFL2s9hgxpLI5SQ==", + "license": "MIT", + "dependencies": { + "@floating-ui/core": "^1.7.5", + "@floating-ui/utils": "^0.2.11" + } + }, + "node_modules/@floating-ui/react-dom": { + "version": "2.1.8", + "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.8.tgz", + "integrity": "sha512-cC52bHwM/n/CxS87FH0yWdngEZrjdtLW/qVruo68qg+prK7ZQ4YGdut2GyDVpoGeAYe/h899rVeOVm6Oi40k2A==", + "license": "MIT", + "dependencies": { + "@floating-ui/dom": "^1.7.6" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@floating-ui/utils": { + "version": "0.2.11", + "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.11.tgz", + "integrity": "sha512-RiB/yIh78pcIxl6lLMG0CgBXAZ2Y0eVHqMPYugu+9U0AeT6YBeiJpf7lbdJNIugFP5SIjwNRgo4DhR1Qxi26Gg==", + "license": "MIT" + }, + "node_modules/@humanfs/core": { + "version": "0.19.1", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18.0" + } + }, + "node_modules/@humanfs/node": { + "version": "0.16.6", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "@humanfs/core": "^0.19.1", + "@humanwhocodes/retry": "^0.3.0" + }, + "engines": { + "node": ">=18.18.0" + } + }, + "node_modules/@humanfs/node/node_modules/@humanwhocodes/retry": { + "version": "0.3.1", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@humanwhocodes/module-importer": { + "version": "1.0.1", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=12.22" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@humanwhocodes/retry": { + "version": "0.4.3", + "dev": true, + "license": "Apache-2.0", + "engines": { + "node": ">=18.18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "dev": true, + "license": "ISC", + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@isaacs/fs-minipass": { + "version": "4.0.1", + "dev": true, + "license": "ISC", + "dependencies": { + "minipass": "^7.0.4" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/@istanbuljs/schema": { + "version": "0.1.3", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.13", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/remapping": { + "version": "2.3.5", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.5", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.30", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@kui/foundations": { + "name": "@kui/foundations-react-external", + "version": "0.504.1", + "resolved": "file:src/assets/kui-foundations-react-external-0.504.1.tgz", + "integrity": "sha512-OzwSRZyrzs4YMg42L0T9uW2um8qORImbZlDRHfOiButQRbW4S+aSbSNA8yxxeNxd3GKoB1kn/YrfyTSsha8M+Q==", + "dev": true, + "dependencies": { + "@ariakit/react": "^0.4.17", + "@date-fns/tz": "^1.2.0", + "@radix-ui/react-primitive": "^2.1.3", + "class-variance-authority": "^0.7.0", + "date-fns": "^4.1.0", + "fast-equals": "^5.2.2", + "radix-ui": "^1.4.2", + "react-day-picker": "^9.7.0" + }, + "peerDependencies": { + "react": "^18.0.0 || ^19.0.0", + "react-dom": "^18.0.0 || ^19.0.0" + } + }, + "node_modules/@kui/react": { + "name": "@kui/foundations-react-external", + "version": "0.504.1", + "resolved": "file:src/assets/kui-foundations-react-external-0.504.1.tgz", + "integrity": "sha512-OzwSRZyrzs4YMg42L0T9uW2um8qORImbZlDRHfOiButQRbW4S+aSbSNA8yxxeNxd3GKoB1kn/YrfyTSsha8M+Q==", + "dependencies": { + "@ariakit/react": "^0.4.17", + "@date-fns/tz": "^1.2.0", + "@radix-ui/react-primitive": "^2.1.3", + "class-variance-authority": "^0.7.0", + "date-fns": "^4.1.0", + "fast-equals": "^5.2.2", + "radix-ui": "^1.4.2", + "react-day-picker": "^9.7.0" + }, + "peerDependencies": { + "react": "^18.0.0 || ^19.0.0", + "react-dom": "^18.0.0 || ^19.0.0" + } + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "dev": true, + "license": "MIT", + "dependencies": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "dev": true, + "license": "MIT", + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "dev": true, + "license": "MIT", + "dependencies": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "dev": true, + "license": "MIT", + "optional": true, + "engines": { + "node": ">=14" + } + }, + "node_modules/@polka/url": { + "version": "1.0.0-next.29", "dev": true, "license": "MIT" }, "node_modules/@radix-ui/number": { "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz", + "integrity": "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==", + "license": "MIT" + }, + "node_modules/@radix-ui/primitive": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", + "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==", "license": "MIT" }, - "node_modules/@radix-ui/primitive": { - "version": "1.1.3", - "license": "MIT" + "node_modules/@radix-ui/react-accessible-icon": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-accessible-icon/-/react-accessible-icon-1.1.7.tgz", + "integrity": "sha512-XM+E4WXl0OqUJFovy6GjmxxFyx9opfCAIUku4dlKRd5YEPqt4kALOkQOp0Of6reHuUkJuiPBEc5k0o4z4lTC8A==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-visually-hidden": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-accordion": { + "version": "1.2.12", + "resolved": "https://registry.npmjs.org/@radix-ui/react-accordion/-/react-accordion-1.2.12.tgz", + "integrity": "sha512-T4nygeh9YE9dLRPhAHSeOZi7HBXo+0kYIPJXayZfvWOWA0+n3dESrZbjfDPUABkUNym6Hd+f2IR113To8D2GPA==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collapsible": "1.1.12", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-alert-dialog": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-alert-dialog/-/react-alert-dialog-1.1.15.tgz", + "integrity": "sha512-oTVLkEw5GpdRe29BqJ0LSDFWI3qu0vR1M0mUkOQWDIUnY/QIkLpgDMWuKxP94c2NAC2LGcgVhG1ImF3jkZ5wXw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dialog": "1.1.15", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-arrow": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.7.tgz", + "integrity": "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-aspect-ratio": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-aspect-ratio/-/react-aspect-ratio-1.1.7.tgz", + "integrity": "sha512-Yq6lvO9HQyPwev1onK1daHCHqXVLzPhSVjmsNjCa2Zcxy2f7uJD2itDtxknv6FzAKCwD1qQkeVDmX/cev13n/g==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-avatar": { + "version": "1.1.10", + "resolved": "https://registry.npmjs.org/@radix-ui/react-avatar/-/react-avatar-1.1.10.tgz", + "integrity": "sha512-V8piFfWapM5OmNCXTzVQY+E1rDa53zY+MQ4Y7356v4fFz6vqCyUtIz2rUD44ZEdwg78/jKmMJHj07+C/Z/rcog==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-is-hydrated": "0.1.0", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-checkbox": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-checkbox/-/react-checkbox-1.3.3.tgz", + "integrity": "sha512-wBbpv+NQftHDdG86Qc0pIyXk5IR3tM8Vd0nWLKDcX8nNn4nXFOFwsKuqw2okA/1D/mpaAkmuyndrPJTYDNZtFw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-previous": "1.1.1", + "@radix-ui/react-use-size": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-collapsible": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/@radix-ui/react-collapsible/-/react-collapsible-1.1.12.tgz", + "integrity": "sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-collection": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.7.tgz", + "integrity": "sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-compose-refs": { + "version": "1.1.2", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.2.tgz", + "integrity": "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context-menu": { + "version": "2.2.16", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context-menu/-/react-context-menu-2.2.16.tgz", + "integrity": "sha512-O8morBEW+HsVG28gYDZPTrT9UUovQUlJue5YO836tiTJhuIWBm/zQHc7j388sHWtdH/xUZurK9olD2+pcqx5ww==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-menu": "2.1.16", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.1.15.tgz", + "integrity": "sha512-TCglVRtzlffRNxRMEyR36DGBLJpeusFcgMVD9PZEzAKnUs1lKCgX5u9BmC2Yg+LL9MgZDugFFs1Vl+Jp4t/PGw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-direction": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.1.tgz", + "integrity": "sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz", + "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-escape-keydown": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dropdown-menu": { + "version": "2.1.16", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dropdown-menu/-/react-dropdown-menu-2.1.16.tgz", + "integrity": "sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-menu": "2.1.16", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-focus-guards": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz", + "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-focus-scope": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz", + "integrity": "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-form": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-form/-/react-form-0.1.8.tgz", + "integrity": "sha512-QM70k4Zwjttifr5a4sZFts9fn8FzHYvQ5PiB19O2HsYibaHSVt9fH9rzB0XZo/YcM+b7t/p7lYCT/F5eOeF5yQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-label": "2.1.7", + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-hover-card": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-hover-card/-/react-hover-card-1.1.15.tgz", + "integrity": "sha512-qgTkjNT1CfKMoP0rcasmlH2r1DAiYicWsDsufxl940sT2wHNEWWv6FMWIQXWhVdmC1d/HYfbhQx60KYyAtKxjg==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-id": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.1.tgz", + "integrity": "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-use-layout-effect": "1.1.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-label": { + "version": "2.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-label/-/react-label-2.1.7.tgz", + "integrity": "sha512-YT1GqPSL8kJn20djelMX7/cTRp/Y9w5IZHvfxQTVHrOqa2yMl7i/UfMqKRU5V7mEyKTrUVgJXhNQPVCG8PBLoQ==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-menu": { + "version": "2.1.16", + "resolved": "https://registry.npmjs.org/@radix-ui/react-menu/-/react-menu-2.1.16.tgz", + "integrity": "sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-roving-focus": "1.1.11", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } }, - "node_modules/@radix-ui/react-accordion": { - "version": "1.2.12", + "node_modules/@radix-ui/react-menubar": { + "version": "1.1.16", + "resolved": "https://registry.npmjs.org/@radix-ui/react-menubar/-/react-menubar-1.1.16.tgz", + "integrity": "sha512-EB1FktTz5xRRi2Er974AUQZWg2yVBb1yjip38/lgwtCVRd3a+maUoGHN/xs9Yv8SY8QwbSEb+YrxGadVWbEutA==", "license": "MIT", "dependencies": { "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collapsible": "1.1.12", "@radix-ui/react-collection": "1.1.7", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", "@radix-ui/react-direction": "1.1.1", "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-menu": "2.1.16", "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-roving-focus": "1.1.11", "@radix-ui/react-use-controllable-state": "1.2.2" }, "peerDependencies": { @@ -906,11 +1936,26 @@ } } }, - "node_modules/@radix-ui/react-arrow": { - "version": "1.1.7", + "node_modules/@radix-ui/react-navigation-menu": { + "version": "1.2.14", + "resolved": "https://registry.npmjs.org/@radix-ui/react-navigation-menu/-/react-navigation-menu-1.2.14.tgz", + "integrity": "sha512-YB9mTFQvCOAQMHU+C/jVl96WmuWeltyUEpRJJky51huhds5W2FQr1J8D/16sQlf0ozxkPK8uF3niQMdUwZPv5w==", "license": "MIT", "dependencies": { - "@radix-ui/react-primitive": "2.1.3" + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-previous": "1.1.1", + "@radix-ui/react-visually-hidden": "1.2.3" }, "peerDependencies": { "@types/react": "*", @@ -927,13 +1972,22 @@ } } }, - "node_modules/@radix-ui/react-avatar": { - "version": "1.1.10", + "node_modules/@radix-ui/react-one-time-password-field": { + "version": "0.1.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-one-time-password-field/-/react-one-time-password-field-0.1.8.tgz", + "integrity": "sha512-ycS4rbwURavDPVjCb5iS3aG4lURFDILi6sKI/WITUMZ13gMmn/xGjpLoqBAalhJaDk8I3UbCM5GzKHrnzwHbvg==", "license": "MIT", "dependencies": { + "@radix-ui/number": "1.1.1", + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-roving-focus": "1.1.11", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-effect-event": "0.0.2", "@radix-ui/react-use-is-hydrated": "0.1.0", "@radix-ui/react-use-layout-effect": "1.1.1" }, @@ -952,18 +2006,20 @@ } } }, - "node_modules/@radix-ui/react-checkbox": { - "version": "1.3.3", + "node_modules/@radix-ui/react-password-toggle-field": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-password-toggle-field/-/react-password-toggle-field-0.1.3.tgz", + "integrity": "sha512-/UuCrDBWravcaMix4TdT+qlNdVwOM1Nck9kWx/vafXsdfj1ChfhOdfi3cy9SGBpWgTXwYCuboT/oYpJy3clqfw==", "license": "MIT", "dependencies": { "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-id": "1.1.1", "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-use-size": "1.1.1" + "@radix-ui/react-use-effect-event": "0.0.2", + "@radix-ui/react-use-is-hydrated": "0.1.0" }, "peerDependencies": { "@types/react": "*", @@ -980,18 +2036,27 @@ } } }, - "node_modules/@radix-ui/react-collapsible": { - "version": "1.1.12", + "node_modules/@radix-ui/react-popover": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", + "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", "license": "MIT", "dependencies": { "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-layout-effect": "1.1.1" + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" }, "peerDependencies": { "@types/react": "*", @@ -1008,14 +2073,22 @@ } } }, - "node_modules/@radix-ui/react-collection": { - "version": "1.1.7", + "node_modules/@radix-ui/react-popper": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", + "integrity": "sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==", "license": "MIT", "dependencies": { + "@floating-ui/react-dom": "^2.0.0", + "@radix-ui/react-arrow": "1.1.7", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3" + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-rect": "1.1.1", + "@radix-ui/react-use-size": "1.1.1", + "@radix-ui/rect": "1.1.1" }, "peerDependencies": { "@types/react": "*", @@ -1032,50 +2105,115 @@ } } }, - "node_modules/@radix-ui/react-compose-refs": { - "version": "1.1.2", + "node_modules/@radix-ui/react-portal": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz", + "integrity": "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==", "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true + }, + "@types/react-dom": { + "optional": true } } }, - "node_modules/@radix-ui/react-context": { - "version": "1.1.2", + "node_modules/@radix-ui/react-presence": { + "version": "1.1.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.5.tgz", + "integrity": "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==", "license": "MIT", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-use-layout-effect": "1.1.1" + }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true + }, + "@types/react-dom": { + "optional": true } } }, - "node_modules/@radix-ui/react-dialog": { - "version": "1.1.15", + "node_modules/@radix-ui/react-primitive": { + "version": "2.1.3", + "license": "MIT", + "dependencies": { + "@radix-ui/react-slot": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-progress": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-progress/-/react-progress-1.1.7.tgz", + "integrity": "sha512-vPdg/tF6YC/ynuBIJlk1mm7Le0VgW6ub6J2UWnTQ7/D23KXcPI1qy+0vBkgKgd38RCMJavBXpB83HPNFMTb0Fg==", + "license": "MIT", + "dependencies": { + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-radio-group": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.3.8.tgz", + "integrity": "sha512-VBKYIYImA5zsxACdisNQ3BjCBfmbGH3kQlnFVqlWU4tXwjy7cGX8ta80BcrO+WJXIn5iBylEH3K6ZTlee//lgQ==", "license": "MIT", "dependencies": { "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-direction": "1.1.1", "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-roving-focus": "1.1.11", "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" + "@radix-ui/react-use-previous": "1.1.1", + "@radix-ui/react-use-size": "1.1.1" }, "peerDependencies": { "@types/react": "*", @@ -1092,28 +2230,52 @@ } } }, - "node_modules/@radix-ui/react-direction": { - "version": "1.1.1", + "node_modules/@radix-ui/react-roving-focus": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz", + "integrity": "sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==", "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-controllable-state": "1.2.2" + }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true + }, + "@types/react-dom": { + "optional": true } } }, - "node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.1.11", + "node_modules/@radix-ui/react-scroll-area": { + "version": "1.2.10", + "resolved": "https://registry.npmjs.org/@radix-ui/react-scroll-area/-/react-scroll-area-1.2.10.tgz", + "integrity": "sha512-tAXIa1g3sM5CGpVT0uIbUx/U3Gs5N8T52IICuCtObaos1S8fzsrPXG5WObkQN3S6NVl6wKgPhAIiBGbWnvc97A==", "license": "MIT", "dependencies": { + "@radix-ui/number": "1.1.1", "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-escape-keydown": "1.1.1" + "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", @@ -1130,17 +2292,33 @@ } } }, - "node_modules/@radix-ui/react-dropdown-menu": { - "version": "2.1.16", + "node_modules/@radix-ui/react-select": { + "version": "2.2.6", + "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.6.tgz", + "integrity": "sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==", "license": "MIT", "dependencies": { + "@radix-ui/number": "1.1.1", "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-menu": "2.1.16", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2" + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-previous": "1.1.1", + "@radix-ui/react-visually-hidden": "1.2.3", + "aria-hidden": "^1.2.4", + "react-remove-scroll": "^2.6.3" }, "peerDependencies": { "@types/react": "*", @@ -1157,26 +2335,46 @@ } } }, - "node_modules/@radix-ui/react-focus-guards": { - "version": "1.1.3", + "node_modules/@radix-ui/react-separator": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.7.tgz", + "integrity": "sha512-0HEb8R9E8A+jZjvmFCy/J4xhbXy3TV+9XSnGJ3KvTtjlIUy/YQ/p6UYZvi7YbeoeXdyU9+Y3scizK6hkY37baA==", "license": "MIT", + "dependencies": { + "@radix-ui/react-primitive": "2.1.3" + }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true + }, + "@types/react-dom": { + "optional": true } } }, - "node_modules/@radix-ui/react-focus-scope": { - "version": "1.1.7", + "node_modules/@radix-ui/react-slider": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slider/-/react-slider-1.3.6.tgz", + "integrity": "sha512-JPYb1GuM1bxfjMRlNLE+BcmBC8onfCi60Blk7OBqi2MLTFdS+8401U4uFjnwkOr49BLmXxLC6JHkvAsx5OJvHw==", "license": "MIT", "dependencies": { + "@radix-ui/number": "1.1.1", + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1" + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-previous": "1.1.1", + "@radix-ui/react-use-size": "1.1.1" }, "peerDependencies": { "@types/react": "*", @@ -1193,11 +2391,11 @@ } } }, - "node_modules/@radix-ui/react-id": { - "version": "1.1.1", + "node_modules/@radix-ui/react-slot": { + "version": "1.2.3", "license": "MIT", "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" + "@radix-ui/react-compose-refs": "1.1.2" }, "peerDependencies": { "@types/react": "*", @@ -1209,11 +2407,19 @@ } } }, - "node_modules/@radix-ui/react-label": { - "version": "2.1.7", + "node_modules/@radix-ui/react-switch": { + "version": "1.2.6", + "resolved": "https://registry.npmjs.org/@radix-ui/react-switch/-/react-switch-1.2.6.tgz", + "integrity": "sha512-bByzr1+ep1zk4VubeEVViV592vu2lHE2BZY5OnzehZqOOgogN80+mNtCqPkhn2gklJqOpxWgPoYTSnhBCqpOXQ==", "license": "MIT", "dependencies": { - "@radix-ui/react-primitive": "2.1.3" + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-previous": "1.1.1", + "@radix-ui/react-use-size": "1.1.1" }, "peerDependencies": { "@types/react": "*", @@ -1230,28 +2436,20 @@ } } }, - "node_modules/@radix-ui/react-menu": { - "version": "2.1.16", + "node_modules/@radix-ui/react-tabs": { + "version": "1.1.13", + "resolved": "https://registry.npmjs.org/@radix-ui/react-tabs/-/react-tabs-1.1.13.tgz", + "integrity": "sha512-7xdcatg7/U+7+Udyoj2zodtI9H/IIopqo+YOIcZOq1nJwXWBZ9p8xiu5llXlekDbZkca79a/fozEYQXIA4sW6A==", "license": "MIT", "dependencies": { "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" + "@radix-ui/react-use-controllable-state": "1.2.2" }, "peerDependencies": { "@types/react": "*", @@ -1268,25 +2466,24 @@ } } }, - "node_modules/@radix-ui/react-popover": { - "version": "1.1.15", + "node_modules/@radix-ui/react-toast": { + "version": "1.2.15", + "resolved": "https://registry.npmjs.org/@radix-ui/react-toast/-/react-toast-1.2.15.tgz", + "integrity": "sha512-3OSz3TacUWy4WtOXV38DggwxoqJK4+eDkNMl5Z/MJZaoUPaP4/9lf81xXMe1I2ReTAptverZUpbPY4wWwWyL5g==", "license": "MIT", "dependencies": { "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-collection": "1.1.7", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", "@radix-ui/react-portal": "1.1.9", "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-callback-ref": "1.1.1", "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-visually-hidden": "1.2.3" }, "peerDependencies": { "@types/react": "*", @@ -1303,20 +2500,15 @@ } } }, - "node_modules/@radix-ui/react-popper": { - "version": "1.2.8", + "node_modules/@radix-ui/react-toggle": { + "version": "1.1.10", + "resolved": "https://registry.npmjs.org/@radix-ui/react-toggle/-/react-toggle-1.1.10.tgz", + "integrity": "sha512-lS1odchhFTeZv3xwHH31YPObmJn8gOg7Lq12inrr0+BH/l3Tsq32VfjqH1oh80ARM3mlkfMic15n0kg4sD1poQ==", "license": "MIT", "dependencies": { - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", + "@radix-ui/primitive": "1.1.3", "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-rect": "1.1.1", - "@radix-ui/react-use-size": "1.1.1", - "@radix-ui/rect": "1.1.1" + "@radix-ui/react-use-controllable-state": "1.2.2" }, "peerDependencies": { "@types/react": "*", @@ -1333,12 +2525,19 @@ } } }, - "node_modules/@radix-ui/react-portal": { - "version": "1.1.9", + "node_modules/@radix-ui/react-toggle-group": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/@radix-ui/react-toggle-group/-/react-toggle-group-1.1.11.tgz", + "integrity": "sha512-5umnS0T8JQzQT6HbPyO7Hh9dgd82NmS36DQr+X/YJ9ctFNCiiQd6IJAYYZ33LUwm8M+taCz5t2ui29fHZc4Y6Q==", "license": "MIT", "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-layout-effect": "1.1.1" + "@radix-ui/react-roving-focus": "1.1.11", + "@radix-ui/react-toggle": "1.1.10", + "@radix-ui/react-use-controllable-state": "1.2.2" }, "peerDependencies": { "@types/react": "*", @@ -1355,12 +2554,19 @@ } } }, - "node_modules/@radix-ui/react-presence": { - "version": "1.1.5", + "node_modules/@radix-ui/react-toolbar": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/@radix-ui/react-toolbar/-/react-toolbar-1.1.11.tgz", + "integrity": "sha512-4ol06/1bLoFu1nwUqzdD4Y5RZ9oDdKeiHIsntug54Hcr1pgaHiPqHFEaXI1IFP/EsOfROQZ8Mig9VTIRza6Tjg==", "license": "MIT", "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-use-layout-effect": "1.1.1" + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-roving-focus": "1.1.11", + "@radix-ui/react-separator": "1.1.7", + "@radix-ui/react-toggle-group": "1.1.11" }, "peerDependencies": { "@types/react": "*", @@ -1377,11 +2583,24 @@ } } }, - "node_modules/@radix-ui/react-primitive": { - "version": "2.1.3", + "node_modules/@radix-ui/react-tooltip": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz", + "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==", "license": "MIT", "dependencies": { - "@radix-ui/react-slot": "1.2.3" + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-id": "1.1.1", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-visually-hidden": "1.2.3" }, "peerDependencies": { "@types/react": "*", @@ -1398,124 +2617,114 @@ } } }, - "node_modules/@radix-ui/react-progress": { - "version": "1.1.7", + "node_modules/@radix-ui/react-use-callback-ref": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", + "integrity": "sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-controllable-state": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.2.2.tgz", + "integrity": "sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==", "license": "MIT", "dependencies": { - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3" + "@radix-ui/react-use-effect-event": "0.0.2", + "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-radio-group": { - "version": "1.3.8", + "node_modules/@radix-ui/react-use-effect-event": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-effect-event/-/react-use-effect-event-0.0.2.tgz", + "integrity": "sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==", "license": "MIT", "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-use-size": "1.1.1" + "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-roving-focus": { - "version": "1.1.11", + "node_modules/@radix-ui/react-use-escape-keydown": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.1.tgz", + "integrity": "sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==", "license": "MIT", "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-controllable-state": "1.2.2" + "@radix-ui/react-use-callback-ref": "1.1.1" }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-slider": { - "version": "1.3.6", + "node_modules/@radix-ui/react-use-is-hydrated": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-is-hydrated/-/react-use-is-hydrated-0.1.0.tgz", + "integrity": "sha512-U+UORVEq+cTnRIaostJv9AGdV3G6Y+zbVd+12e18jQ5A3c0xL03IhnHuiU4UV69wolOQp5GfR58NW/EgdQhwOA==", "license": "MIT", "dependencies": { - "@radix-ui/number": "1.1.1", - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-use-size": "1.1.1" + "use-sync-external-store": "^1.5.0" }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-slot": { - "version": "1.2.3", + "node_modules/@radix-ui/react-use-layout-effect": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.1.tgz", + "integrity": "sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==", "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-previous": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-previous/-/react-use-previous-1.1.1.tgz", + "integrity": "sha512-2dHfToCj/pzca2Ck724OZ5L0EVrr3eHRNsG/b3xQJLA2hZpVCS99bLAX+hm1IHXDEnzU6by5z/5MIY794/a8NQ==", + "license": "MIT", "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" @@ -1526,77 +2735,49 @@ } } }, - "node_modules/@radix-ui/react-switch": { - "version": "1.2.6", + "node_modules/@radix-ui/react-use-rect": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.1.tgz", + "integrity": "sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==", "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-use-size": "1.1.1" + "dependencies": { + "@radix-ui/rect": "1.1.1" }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-tabs": { - "version": "1.1.13", + "node_modules/@radix-ui/react-use-size": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.1.tgz", + "integrity": "sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==", "license": "MIT", "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-use-controllable-state": "1.2.2" + "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-tooltip": { - "version": "1.2.8", + "node_modules/@radix-ui/react-visually-hidden": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz", + "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==", "license": "MIT", "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-visually-hidden": "1.2.3" + "@radix-ui/react-primitive": "2.1.3" }, "peerDependencies": { "@types/react": "*", @@ -1613,174 +2794,474 @@ } } }, - "node_modules/@radix-ui/react-use-callback-ref": { + "node_modules/@radix-ui/rect": { "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz", + "integrity": "sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==", + "license": "MIT" + }, + "node_modules/@rolldown/pluginutils": { + "version": "1.0.0-beta.27", + "dev": true, + "license": "MIT" + }, + "node_modules/@rollup/rollup-android-arm-eabi": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.49.0.tgz", + "integrity": "sha512-rlKIeL854Ed0e09QGYFlmDNbka6I3EQFw7iZuugQjMb11KMpJCLPFL4ZPbMfaEhLADEL1yx0oujGkBQ7+qW3eA==", + "cpu": [ + "arm" + ], + "dev": true, "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-android-arm64": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.49.0.tgz", + "integrity": "sha512-cqPpZdKUSQYRtLLr6R4X3sD4jCBO1zUmeo3qrWBCqYIeH8Q3KRL4F3V7XJ2Rm8/RJOQBZuqzQGWPjjvFUcYa/w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ] + }, + "node_modules/@rollup/rollup-darwin-arm64": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.49.0.tgz", + "integrity": "sha512-99kMMSMQT7got6iYX3yyIiJfFndpojBmkHfTc1rIje8VbjhmqBXE+nb7ZZP3A5skLyujvT0eIUCUsxAe6NjWbw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-darwin-x64": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.49.0.tgz", + "integrity": "sha512-y8cXoD3wdWUDpjOLMKLx6l+NFz3NlkWKcBCBfttUn+VGSfgsQ5o/yDUGtzE9HvsodkP0+16N0P4Ty1VuhtRUGg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@rollup/rollup-freebsd-arm64": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.49.0.tgz", + "integrity": "sha512-3mY5Pr7qv4GS4ZvWoSP8zha8YoiqrU+e0ViPvB549jvliBbdNLrg2ywPGkgLC3cmvN8ya3za+Q2xVyT6z+vZqA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-freebsd-x64": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.49.0.tgz", + "integrity": "sha512-C9KzzOAQU5gU4kG8DTk+tjdKjpWhVWd5uVkinCwwFub2m7cDYLOdtXoMrExfeBmeRy9kBQMkiyJ+HULyF1yj9w==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ] + }, + "node_modules/@rollup/rollup-linux-arm-gnueabihf": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.49.0.tgz", + "integrity": "sha512-OVSQgEZDVLnTbMq5NBs6xkmz3AADByCWI4RdKSFNlDsYXdFtlxS59J+w+LippJe8KcmeSSM3ba+GlsM9+WwC1w==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm-musleabihf": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.49.0.tgz", + "integrity": "sha512-ZnfSFA7fDUHNa4P3VwAcfaBLakCbYaxCk0jUnS3dTou9P95kwoOLAMlT3WmEJDBCSrOEFFV0Y1HXiwfLYJuLlA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-gnu": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.49.0.tgz", + "integrity": "sha512-Z81u+gfrobVK2iV7GqZCBfEB1y6+I61AH466lNK+xy1jfqFLiQ9Qv716WUM5fxFrYxwC7ziVdZRU9qvGHkYIJg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-arm64-musl": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.49.0.tgz", + "integrity": "sha512-zoAwS0KCXSnTp9NH/h9aamBAIve0DXeYpll85shf9NJ0URjSTzzS+Z9evmolN+ICfD3v8skKUPyk2PO0uGdFqg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-loongarch64-gnu": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loongarch64-gnu/-/rollup-linux-loongarch64-gnu-4.49.0.tgz", + "integrity": "sha512-2QyUyQQ1ZtwZGiq0nvODL+vLJBtciItC3/5cYN8ncDQcv5avrt2MbKt1XU/vFAJlLta5KujqyHdYtdag4YEjYQ==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-ppc64-gnu": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.49.0.tgz", + "integrity": "sha512-k9aEmOWt+mrMuD3skjVJSSxHckJp+SiFzFG+v8JLXbc/xi9hv2icSkR3U7uQzqy+/QbbYY7iNB9eDTwrELo14g==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-gnu": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.49.0.tgz", + "integrity": "sha512-rDKRFFIWJ/zJn6uk2IdYLc09Z7zkE5IFIOWqpuU0o6ZpHcdniAyWkwSUWE/Z25N/wNDmFHHMzin84qW7Wzkjsw==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-riscv64-musl": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.49.0.tgz", + "integrity": "sha512-FkkhIY/hYFVnOzz1WeV3S9Bd1h0hda/gRqvZCMpHWDHdiIHn6pqsY3b5eSbvGccWHMQ1uUzgZTKS4oGpykf8Tw==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-s390x-gnu": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.49.0.tgz", + "integrity": "sha512-gRf5c+A7QiOG3UwLyOOtyJMD31JJhMjBvpfhAitPAoqZFcOeK3Kc1Veg1z/trmt+2P6F/biT02fU19GGTS529A==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-gnu": { + "version": "4.49.0", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-linux-x64-musl": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.49.0.tgz", + "integrity": "sha512-hDMOAe+6nX3V5ei1I7Au3wcr9h3ktKzDvF2ne5ovX8RZiAHEtX1A5SNNk4zt1Qt77CmnbqT+upb/umzoPMWiPg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@rollup/rollup-win32-arm64-msvc": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.49.0.tgz", + "integrity": "sha512-wkNRzfiIGaElC9kXUT+HLx17z7D0jl+9tGYRKwd8r7cUqTL7GYAvgUY++U2hK6Ar7z5Z6IRRoWC8kQxpmM7TDA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-ia32-msvc": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.49.0.tgz", + "integrity": "sha512-gq5aW/SyNpjp71AAzroH37DtINDcX1Qw2iv9Chyz49ZgdOP3NV8QCyKZUrGsYX9Yyggj5soFiRCgsL3HwD8TdA==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] + }, + "node_modules/@rollup/rollup-win32-x64-msvc": { + "version": "4.49.0", + "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.49.0.tgz", + "integrity": "sha512-gEtqFbzmZLFk2xKh7g0Rlo8xzho8KrEFEkzvHbfUGkrgXOpZ4XagQ6n+wIZFNh1nTb8UD16J4nFSFKXYgnbdBg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ] }, - "node_modules/@radix-ui/react-use-controllable-state": { - "version": "1.2.2", + "node_modules/@tailwindcss/node": { + "version": "4.1.12", + "dev": true, "license": "MIT", "dependencies": { - "@radix-ui/react-use-effect-event": "0.0.2", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } + "@jridgewell/remapping": "^2.3.4", + "enhanced-resolve": "^5.18.3", + "jiti": "^2.5.1", + "lightningcss": "1.30.1", + "magic-string": "^0.30.17", + "source-map-js": "^1.2.1", + "tailwindcss": "4.1.12" } }, - "node_modules/@radix-ui/react-use-effect-event": { - "version": "0.0.2", + "node_modules/@tailwindcss/oxide": { + "version": "4.1.12", + "dev": true, + "hasInstallScript": true, "license": "MIT", "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" + "detect-libc": "^2.0.4", + "tar": "^7.4.3" }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + "engines": { + "node": ">= 10" }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } + "optionalDependencies": { + "@tailwindcss/oxide-android-arm64": "4.1.12", + "@tailwindcss/oxide-darwin-arm64": "4.1.12", + "@tailwindcss/oxide-darwin-x64": "4.1.12", + "@tailwindcss/oxide-freebsd-x64": "4.1.12", + "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.12", + "@tailwindcss/oxide-linux-arm64-gnu": "4.1.12", + "@tailwindcss/oxide-linux-arm64-musl": "4.1.12", + "@tailwindcss/oxide-linux-x64-gnu": "4.1.12", + "@tailwindcss/oxide-linux-x64-musl": "4.1.12", + "@tailwindcss/oxide-wasm32-wasi": "4.1.12", + "@tailwindcss/oxide-win32-arm64-msvc": "4.1.12", + "@tailwindcss/oxide-win32-x64-msvc": "4.1.12" } }, - "node_modules/@radix-ui/react-use-escape-keydown": { - "version": "1.1.1", + "node_modules/@tailwindcss/oxide-android-arm64": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-android-arm64/-/oxide-android-arm64-4.1.12.tgz", + "integrity": "sha512-oNY5pq+1gc4T6QVTsZKwZaGpBb2N1H1fsc1GD4o7yinFySqIuRZ2E4NvGasWc6PhYJwGK2+5YT1f9Tp80zUQZQ==", + "cpu": [ + "arm64" + ], + "dev": true, "license": "MIT", - "dependencies": { - "@radix-ui/react-use-callback-ref": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" } }, - "node_modules/@radix-ui/react-use-is-hydrated": { - "version": "0.1.0", + "node_modules/@tailwindcss/oxide-darwin-arm64": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-arm64/-/oxide-darwin-arm64-4.1.12.tgz", + "integrity": "sha512-cq1qmq2HEtDV9HvZlTtrj671mCdGB93bVY6J29mwCyaMYCP/JaUBXxrQQQm7Qn33AXXASPUb2HFZlWiiHWFytw==", + "cpu": [ + "arm64" + ], + "dev": true, "license": "MIT", - "dependencies": { - "use-sync-external-store": "^1.5.0" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" } }, - "node_modules/@radix-ui/react-use-layout-effect": { - "version": "1.1.1", + "node_modules/@tailwindcss/oxide-darwin-x64": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-darwin-x64/-/oxide-darwin-x64-4.1.12.tgz", + "integrity": "sha512-6UCsIeFUcBfpangqlXay9Ffty9XhFH1QuUFn0WV83W8lGdX8cD5/+2ONLluALJD5+yJ7k8mVtwy3zMZmzEfbLg==", + "cpu": [ + "x64" + ], + "dev": true, "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" } }, - "node_modules/@radix-ui/react-use-previous": { - "version": "1.1.1", + "node_modules/@tailwindcss/oxide-freebsd-x64": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-freebsd-x64/-/oxide-freebsd-x64-4.1.12.tgz", + "integrity": "sha512-JOH/f7j6+nYXIrHobRYCtoArJdMJh5zy5lr0FV0Qu47MID/vqJAY3r/OElPzx1C/wdT1uS7cPq+xdYYelny1ww==", + "cpu": [ + "x64" + ], + "dev": true, "license": "MIT", - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10" } }, - "node_modules/@radix-ui/react-use-rect": { - "version": "1.1.1", + "node_modules/@tailwindcss/oxide-linux-arm-gnueabihf": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm-gnueabihf/-/oxide-linux-arm-gnueabihf-4.1.12.tgz", + "integrity": "sha512-v4Ghvi9AU1SYgGr3/j38PD8PEe6bRfTnNSUE3YCMIRrrNigCFtHZ2TCm8142X8fcSqHBZBceDx+JlFJEfNg5zQ==", + "cpu": [ + "arm" + ], + "dev": true, "license": "MIT", - "dependencies": { - "@radix-ui/rect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" } }, - "node_modules/@radix-ui/react-use-size": { - "version": "1.1.1", + "node_modules/@tailwindcss/oxide-linux-arm64-gnu": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-gnu/-/oxide-linux-arm64-gnu-4.1.12.tgz", + "integrity": "sha512-YP5s1LmetL9UsvVAKusHSyPlzSRqYyRB0f+Kl/xcYQSPLEw/BvGfxzbH+ihUciePDjiXwHh+p+qbSP3SlJw+6g==", + "cpu": [ + "arm64" + ], + "dev": true, "license": "MIT", - "dependencies": { - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" } }, - "node_modules/@radix-ui/react-visually-hidden": { - "version": "1.2.3", + "node_modules/@tailwindcss/oxide-linux-arm64-musl": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-arm64-musl/-/oxide-linux-arm64-musl-4.1.12.tgz", + "integrity": "sha512-V8pAM3s8gsrXcCv6kCHSuwyb/gPsd863iT+v1PGXC4fSL/OJqsKhfK//v8P+w9ThKIoqNbEnsZqNy+WDnwQqCA==", + "cpu": [ + "arm64" + ], + "dev": true, "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" } }, - "node_modules/@radix-ui/rect": { - "version": "1.1.1", - "license": "MIT" - }, - "node_modules/@rolldown/pluginutils": { - "version": "1.0.0-beta.27", + "node_modules/@tailwindcss/oxide-linux-x64-gnu": { + "version": "4.1.12", + "cpu": [ + "x64" + ], "dev": true, - "license": "MIT" + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } }, - "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.49.0", + "node_modules/@tailwindcss/oxide-linux-x64-musl": { + "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-linux-x64-musl/-/oxide-linux-x64-musl-4.1.12.tgz", + "integrity": "sha512-ha0pHPamN+fWZY7GCzz5rKunlv9L5R8kdh+YNvP5awe3LtuXb5nRi/H27GeL2U+TdhDOptU7T6Is7mdwh5Ar3A==", "cpu": [ "x64" ], @@ -1789,51 +3270,62 @@ "optional": true, "os": [ "linux" - ] + ], + "engines": { + "node": ">= 10" + } }, - "node_modules/@tailwindcss/node": { + "node_modules/@tailwindcss/oxide-wasm32-wasi": { "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-wasm32-wasi/-/oxide-wasm32-wasi-4.1.12.tgz", + "integrity": "sha512-4tSyu3dW+ktzdEpuk6g49KdEangu3eCYoqPhWNsZgUhyegEda3M9rG0/j1GV/JjVVsj+lG7jWAyrTlLzd/WEBg==", + "bundleDependencies": [ + "@napi-rs/wasm-runtime", + "@emnapi/core", + "@emnapi/runtime", + "@tybys/wasm-util", + "@emnapi/wasi-threads", + "tslib" + ], + "cpu": [ + "wasm32" + ], "dev": true, "license": "MIT", - "dependencies": { - "@jridgewell/remapping": "^2.3.4", - "enhanced-resolve": "^5.18.3", - "jiti": "^2.5.1", - "lightningcss": "1.30.1", - "magic-string": "^0.30.17", - "source-map-js": "^1.2.1", - "tailwindcss": "4.1.12" + "optional": true, + "dependencies": { + "@emnapi/core": "^1.4.5", + "@emnapi/runtime": "^1.4.5", + "@emnapi/wasi-threads": "^1.0.4", + "@napi-rs/wasm-runtime": "^0.2.12", + "@tybys/wasm-util": "^0.10.0", + "tslib": "^2.8.0" + }, + "engines": { + "node": ">=14.0.0" } }, - "node_modules/@tailwindcss/oxide": { + "node_modules/@tailwindcss/oxide-win32-arm64-msvc": { "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-arm64-msvc/-/oxide-win32-arm64-msvc-4.1.12.tgz", + "integrity": "sha512-iGLyD/cVP724+FGtMWslhcFyg4xyYyM+5F4hGvKA7eifPkXHRAUDFaimu53fpNg9X8dfP75pXx/zFt/jlNF+lg==", + "cpu": [ + "arm64" + ], "dev": true, - "hasInstallScript": true, "license": "MIT", - "dependencies": { - "detect-libc": "^2.0.4", - "tar": "^7.4.3" - }, + "optional": true, + "os": [ + "win32" + ], "engines": { "node": ">= 10" - }, - "optionalDependencies": { - "@tailwindcss/oxide-android-arm64": "4.1.12", - "@tailwindcss/oxide-darwin-arm64": "4.1.12", - "@tailwindcss/oxide-darwin-x64": "4.1.12", - "@tailwindcss/oxide-freebsd-x64": "4.1.12", - "@tailwindcss/oxide-linux-arm-gnueabihf": "4.1.12", - "@tailwindcss/oxide-linux-arm64-gnu": "4.1.12", - "@tailwindcss/oxide-linux-arm64-musl": "4.1.12", - "@tailwindcss/oxide-linux-x64-gnu": "4.1.12", - "@tailwindcss/oxide-linux-x64-musl": "4.1.12", - "@tailwindcss/oxide-wasm32-wasi": "4.1.12", - "@tailwindcss/oxide-win32-arm64-msvc": "4.1.12", - "@tailwindcss/oxide-win32-x64-msvc": "4.1.12" } }, - "node_modules/@tailwindcss/oxide-linux-x64-gnu": { + "node_modules/@tailwindcss/oxide-win32-x64-msvc": { "version": "4.1.12", + "resolved": "https://registry.npmjs.org/@tailwindcss/oxide-win32-x64-msvc/-/oxide-win32-x64-msvc-4.1.12.tgz", + "integrity": "sha512-NKIh5rzw6CpEodv/++r0hGLlfgT/gFN+5WNdZtvh6wpU2BpGNgdjvj6H2oFc8nCM839QM1YOhjpgbAONUb4IxA==", "cpu": [ "x64" ], @@ -1841,7 +3333,7 @@ "license": "MIT", "optional": true, "os": [ - "linux" + "win32" ], "engines": { "node": ">= 10" @@ -2018,6 +3510,16 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/dompurify": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/@types/dompurify/-/dompurify-3.0.5.tgz", + "integrity": "sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/trusted-types": "*" + } + }, "node_modules/@types/estree": { "version": "1.0.8", "dev": true, @@ -2054,6 +3556,13 @@ "@types/react": "^19.0.0" } }, + "node_modules/@types/trusted-types": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz", + "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==", + "devOptional": true, + "license": "MIT" + }, "node_modules/@typescript-eslint/eslint-plugin": { "version": "8.41.0", "dev": true, @@ -2540,6 +4049,8 @@ }, "node_modules/aria-hidden": { "version": "1.2.6", + "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.6.tgz", + "integrity": "sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA==", "license": "MIT", "dependencies": { "tslib": "^2.0.0" @@ -2760,10 +4271,16 @@ "license": "MIT" }, "node_modules/cookie": { - "version": "1.0.2", + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-1.1.1.tgz", + "integrity": "sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==", "license": "MIT", "engines": { "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" } }, "node_modules/cross-spawn": { @@ -2877,6 +4394,8 @@ }, "node_modules/detect-node-es": { "version": "1.1.0", + "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz", + "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==", "license": "MIT" }, "node_modules/dom-accessibility-api": { @@ -2885,6 +4404,15 @@ "license": "MIT", "peer": true }, + "node_modules/dompurify": { + "version": "3.3.3", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.3.tgz", + "integrity": "sha512-Oj6pzI2+RqBfFG+qOaOLbFXLQ90ARpcGG6UePL82bJLtdsa6CYJD7nmiU8MW9nQNOtCHV3lZ/Bzq1X0QYbBZCA==", + "license": "(MPL-2.0 OR Apache-2.0)", + "optionalDependencies": { + "@types/trusted-types": "^2.0.7" + } + }, "node_modules/eastasianwidth": { "version": "0.2.0", "dev": true, @@ -3307,6 +4835,21 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, "node_modules/gensync": { "version": "1.0.0-beta.2", "dev": true, @@ -3317,6 +4860,8 @@ }, "node_modules/get-nonce": { "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz", + "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==", "license": "MIT", "engines": { "node": ">=6" @@ -3744,6 +5289,132 @@ "lightningcss-win32-x64-msvc": "1.30.1" } }, + "node_modules/lightningcss-darwin-arm64": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.30.1.tgz", + "integrity": "sha512-c8JK7hyE65X1MHMN+Viq9n11RRC7hgin3HhYKhrMyaXflk5GVplZ60IxyoVtzILeKr+xAJwg6zK6sjTBJ0FKYQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-darwin-x64": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-darwin-x64/-/lightningcss-darwin-x64-1.30.1.tgz", + "integrity": "sha512-k1EvjakfumAQoTfcXUcHQZhSpLlkAuEkdMBsI/ivWw9hL+7FtilQc0Cy3hrx0AAQrVtQAbMI7YjCgYgvn37PzA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-freebsd-x64": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-freebsd-x64/-/lightningcss-freebsd-x64-1.30.1.tgz", + "integrity": "sha512-kmW6UGCGg2PcyUE59K5r0kWfKPAVy4SltVeut+umLCFoJ53RdCUWxcRDzO1eTaxf/7Q2H7LTquFHPL5R+Gjyig==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm-gnueabihf": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm-gnueabihf/-/lightningcss-linux-arm-gnueabihf-1.30.1.tgz", + "integrity": "sha512-MjxUShl1v8pit+6D/zSPq9S9dQ2NPFSQwGvxBCYaBYLPlCWuPh9/t1MRS8iUaR8i+a6w7aps+B4N0S1TYP/R+Q==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-gnu": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-gnu/-/lightningcss-linux-arm64-gnu-1.30.1.tgz", + "integrity": "sha512-gB72maP8rmrKsnKYy8XUuXi/4OctJiuQjcuqWNlJQ6jZiWqtPvqFziskH3hnajfvKB27ynbVCucKSm2rkQp4Bw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-linux-arm64-musl": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-arm64-musl/-/lightningcss-linux-arm64-musl-1.30.1.tgz", + "integrity": "sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, "node_modules/lightningcss-linux-x64-gnu": { "version": "1.30.1", "cpu": [ @@ -3763,6 +5434,69 @@ "url": "https://opencollective.com/parcel" } }, + "node_modules/lightningcss-linux-x64-musl": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-linux-x64-musl/-/lightningcss-linux-x64-musl-1.30.1.tgz", + "integrity": "sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-arm64-msvc": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-win32-arm64-msvc/-/lightningcss-win32-arm64-msvc-1.30.1.tgz", + "integrity": "sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/lightningcss-win32-x64-msvc": { + "version": "1.30.1", + "resolved": "https://registry.npmjs.org/lightningcss-win32-x64-msvc/-/lightningcss-win32-x64-msvc-1.30.1.tgz", + "integrity": "sha512-PVqXh48wh4T53F/1CCu8PIPCxLzWyCnn/9T5W1Jpmdy5h9Cwd+0YQS6/LwhHXSafuc61/xg9Lv5OrCby6a++jg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MPL-2.0", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 12.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, "node_modules/locate-path": { "version": "6.0.0", "dev": true, @@ -3795,6 +5529,15 @@ "yallist": "^3.0.2" } }, + "node_modules/lucide-react": { + "version": "0.559.0", + "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.559.0.tgz", + "integrity": "sha512-3ymrkBPXWk3U2bwUDg6TdA6hP5iGDMgPEAMLhchEgTQmA+g0Zk24tOtKtXMx35w1PizTmsBC3RhP88QYm+7mHQ==", + "license": "ISC", + "peerDependencies": { + "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/lz-string": { "version": "1.5.0", "dev": true, @@ -4209,6 +5952,83 @@ ], "license": "MIT" }, + "node_modules/radix-ui": { + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/radix-ui/-/radix-ui-1.4.3.tgz", + "integrity": "sha512-aWizCQiyeAenIdUbqEpXgRA1ya65P13NKn/W8rWkcN0OPkRDxdBVLWnIEDsS2RpwCK2nobI7oMUSmexzTDyAmA==", + "license": "MIT", + "dependencies": { + "@radix-ui/primitive": "1.1.3", + "@radix-ui/react-accessible-icon": "1.1.7", + "@radix-ui/react-accordion": "1.2.12", + "@radix-ui/react-alert-dialog": "1.1.15", + "@radix-ui/react-arrow": "1.1.7", + "@radix-ui/react-aspect-ratio": "1.1.7", + "@radix-ui/react-avatar": "1.1.10", + "@radix-ui/react-checkbox": "1.3.3", + "@radix-ui/react-collapsible": "1.1.12", + "@radix-ui/react-collection": "1.1.7", + "@radix-ui/react-compose-refs": "1.1.2", + "@radix-ui/react-context": "1.1.2", + "@radix-ui/react-context-menu": "2.2.16", + "@radix-ui/react-dialog": "1.1.15", + "@radix-ui/react-direction": "1.1.1", + "@radix-ui/react-dismissable-layer": "1.1.11", + "@radix-ui/react-dropdown-menu": "2.1.16", + "@radix-ui/react-focus-guards": "1.1.3", + "@radix-ui/react-focus-scope": "1.1.7", + "@radix-ui/react-form": "0.1.8", + "@radix-ui/react-hover-card": "1.1.15", + "@radix-ui/react-label": "2.1.7", + "@radix-ui/react-menu": "2.1.16", + "@radix-ui/react-menubar": "1.1.16", + "@radix-ui/react-navigation-menu": "1.2.14", + "@radix-ui/react-one-time-password-field": "0.1.8", + "@radix-ui/react-password-toggle-field": "0.1.3", + "@radix-ui/react-popover": "1.1.15", + "@radix-ui/react-popper": "1.2.8", + "@radix-ui/react-portal": "1.1.9", + "@radix-ui/react-presence": "1.1.5", + "@radix-ui/react-primitive": "2.1.3", + "@radix-ui/react-progress": "1.1.7", + "@radix-ui/react-radio-group": "1.3.8", + "@radix-ui/react-roving-focus": "1.1.11", + "@radix-ui/react-scroll-area": "1.2.10", + "@radix-ui/react-select": "2.2.6", + "@radix-ui/react-separator": "1.1.7", + "@radix-ui/react-slider": "1.3.6", + "@radix-ui/react-slot": "1.2.3", + "@radix-ui/react-switch": "1.2.6", + "@radix-ui/react-tabs": "1.1.13", + "@radix-ui/react-toast": "1.2.15", + "@radix-ui/react-toggle": "1.1.10", + "@radix-ui/react-toggle-group": "1.1.11", + "@radix-ui/react-toolbar": "1.1.11", + "@radix-ui/react-tooltip": "1.2.8", + "@radix-ui/react-use-callback-ref": "1.1.1", + "@radix-ui/react-use-controllable-state": "1.2.2", + "@radix-ui/react-use-effect-event": "0.0.2", + "@radix-ui/react-use-escape-keydown": "1.1.1", + "@radix-ui/react-use-is-hydrated": "0.1.0", + "@radix-ui/react-use-layout-effect": "1.1.1", + "@radix-ui/react-use-size": "1.1.1", + "@radix-ui/react-visually-hidden": "1.2.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, "node_modules/react": { "version": "19.1.1", "license": "MIT", @@ -4260,7 +6080,9 @@ } }, "node_modules/react-remove-scroll": { - "version": "2.7.1", + "version": "2.7.2", + "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.7.2.tgz", + "integrity": "sha512-Iqb9NjCCTt6Hf+vOdNIZGdTiH1QSqr27H/Ek9sv/a97gfueI/5h1s3yRi1nngzMUaOOToin5dI1dXKdXiF+u0Q==", "license": "MIT", "dependencies": { "react-remove-scroll-bar": "^2.3.7", @@ -4284,6 +6106,8 @@ }, "node_modules/react-remove-scroll-bar": { "version": "2.3.8", + "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.8.tgz", + "integrity": "sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==", "license": "MIT", "dependencies": { "react-style-singleton": "^2.2.2", @@ -4303,7 +6127,9 @@ } }, "node_modules/react-router": { - "version": "7.8.2", + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.14.0.tgz", + "integrity": "sha512-m/xR9N4LQLmAS0ZhkY2nkPA1N7gQ5TUVa5n8TgANuDTARbn1gt+zLPXEm7W0XDTbrQ2AJSJKhoa6yx1D8BcpxQ==", "license": "MIT", "dependencies": { "cookie": "^1.0.1", @@ -4323,10 +6149,12 @@ } }, "node_modules/react-router-dom": { - "version": "7.8.2", + "version": "7.14.0", + "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.14.0.tgz", + "integrity": "sha512-2G3ajSVSZMEtmTjIklRWlNvo8wICEpLihfD/0YMDxbWK2UyP5EGfnoIn9AIQGnF3G/FX0MRbHXdFcD+rL1ZreQ==", "license": "MIT", "dependencies": { - "react-router": "7.8.2" + "react-router": "7.14.0" }, "engines": { "node": ">=20.0.0" @@ -4338,6 +6166,8 @@ }, "node_modules/react-style-singleton": { "version": "2.2.3", + "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.3.tgz", + "integrity": "sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==", "license": "MIT", "dependencies": { "get-nonce": "^1.0.0", @@ -4479,7 +6309,9 @@ } }, "node_modules/set-cookie-parser": { - "version": "2.7.1", + "version": "2.7.2", + "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-2.7.2.tgz", + "integrity": "sha512-oeM1lpU/UvhTxw+g3cIfxXHyJRc/uidd3yK1P242gzHds0udQBYzs3y8j4gCCW+ZJ7ad0yctld8RYO+bdurlvw==", "license": "MIT" }, "node_modules/shebang-command": { @@ -4880,6 +6712,8 @@ }, "node_modules/tslib": { "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, "node_modules/type-check": { @@ -4973,6 +6807,8 @@ }, "node_modules/use-callback-ref": { "version": "1.3.3", + "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.3.tgz", + "integrity": "sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==", "license": "MIT", "dependencies": { "tslib": "^2.0.0" @@ -4992,6 +6828,8 @@ }, "node_modules/use-sidecar": { "version": "1.1.3", + "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.3.tgz", + "integrity": "sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==", "license": "MIT", "dependencies": { "detect-node-es": "^1.1.0", diff --git a/frontend/package.json b/frontend/package.json index fe77b7146..49adc87b6 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -17,6 +17,7 @@ "dependencies": { "@kui/react": "./src/assets/kui-foundations-react-external-0.504.1.tgz", "@tanstack/react-query": "^5.80.7", + "dompurify": "^3.3.3", "lucide-react": "^0.559.0", "marked": "^15.0.12", "react": "^19.1.0", @@ -32,6 +33,7 @@ "@testing-library/jest-dom": "^6.6.4", "@testing-library/react": "^16.3.0", "@testing-library/user-event": "^14.6.1", + "@types/dompurify": "^3.0.5", "@types/node": "^24.5.1", "@types/react": "^19.1.2", "@types/react-dom": "^19.1.2", @@ -51,5 +53,12 @@ }, "resolutions": { "@kui/foundations": "./src/assets/kui-foundations-react-external-0.504.1.tgz" + }, + "pnpm": { + "overrides": { + "rollup": ">=4.59.0", + "minimatch@3.1.2": "3.1.4", + "minimatch@9.0.5": "9.0.7" + } } -} \ No newline at end of file +} diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml index ea8207293..cc1a633fc 100644 --- a/frontend/pnpm-lock.yaml +++ b/frontend/pnpm-lock.yaml @@ -6,6 +6,9 @@ settings: overrides: '@kui/foundations': ./src/assets/kui-foundations-react-external-0.504.1.tgz + rollup: '>=4.59.0' + minimatch@3.1.2: 3.1.4 + minimatch@9.0.5: 9.0.7 importers: @@ -1233,113 +1236,141 @@ packages: '@rolldown/pluginutils@1.0.0-beta.27': resolution: {integrity: sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==} - '@rollup/rollup-android-arm-eabi@4.53.3': - resolution: {integrity: sha512-mRSi+4cBjrRLoaal2PnqH82Wqyb+d3HsPUN/W+WslCXsZsyHa9ZeQQX/pQsZaVIWDkPcpV6jJ+3KLbTbgnwv8w==} + '@rollup/rollup-android-arm-eabi@4.59.0': + resolution: {integrity: sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==} cpu: [arm] os: [android] - '@rollup/rollup-android-arm64@4.53.3': - resolution: {integrity: sha512-CbDGaMpdE9sh7sCmTrTUyllhrg65t6SwhjlMJsLr+J8YjFuPmCEjbBSx4Z/e4SmDyH3aB5hGaJUP2ltV/vcs4w==} + '@rollup/rollup-android-arm64@4.59.0': + resolution: {integrity: sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==} cpu: [arm64] os: [android] - '@rollup/rollup-darwin-arm64@4.53.3': - resolution: {integrity: sha512-Nr7SlQeqIBpOV6BHHGZgYBuSdanCXuw09hon14MGOLGmXAFYjx1wNvquVPmpZnl0tLjg25dEdr4IQ6GgyToCUA==} + '@rollup/rollup-darwin-arm64@4.59.0': + resolution: {integrity: sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==} cpu: [arm64] os: [darwin] - '@rollup/rollup-darwin-x64@4.53.3': - resolution: {integrity: sha512-DZ8N4CSNfl965CmPktJ8oBnfYr3F8dTTNBQkRlffnUarJ2ohudQD17sZBa097J8xhQ26AwhHJ5mvUyQW8ddTsQ==} + '@rollup/rollup-darwin-x64@4.59.0': + resolution: {integrity: sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==} cpu: [x64] os: [darwin] - '@rollup/rollup-freebsd-arm64@4.53.3': - resolution: {integrity: sha512-yMTrCrK92aGyi7GuDNtGn2sNW+Gdb4vErx4t3Gv/Tr+1zRb8ax4z8GWVRfr3Jw8zJWvpGHNpss3vVlbF58DZ4w==} + '@rollup/rollup-freebsd-arm64@4.59.0': + resolution: {integrity: sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==} cpu: [arm64] os: [freebsd] - '@rollup/rollup-freebsd-x64@4.53.3': - resolution: {integrity: sha512-lMfF8X7QhdQzseM6XaX0vbno2m3hlyZFhwcndRMw8fbAGUGL3WFMBdK0hbUBIUYcEcMhVLr1SIamDeuLBnXS+Q==} + '@rollup/rollup-freebsd-x64@4.59.0': + resolution: {integrity: sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==} cpu: [x64] os: [freebsd] - '@rollup/rollup-linux-arm-gnueabihf@4.53.3': - resolution: {integrity: sha512-k9oD15soC/Ln6d2Wv/JOFPzZXIAIFLp6B+i14KhxAfnq76ajt0EhYc5YPeX6W1xJkAdItcVT+JhKl1QZh44/qw==} + '@rollup/rollup-linux-arm-gnueabihf@4.59.0': + resolution: {integrity: sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==} cpu: [arm] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-arm-musleabihf@4.53.3': - resolution: {integrity: sha512-vTNlKq+N6CK/8UktsrFuc+/7NlEYVxgaEgRXVUVK258Z5ymho29skzW1sutgYjqNnquGwVUObAaxae8rZ6YMhg==} + '@rollup/rollup-linux-arm-musleabihf@4.59.0': + resolution: {integrity: sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==} cpu: [arm] os: [linux] + libc: [musl] - '@rollup/rollup-linux-arm64-gnu@4.53.3': - resolution: {integrity: sha512-RGrFLWgMhSxRs/EWJMIFM1O5Mzuz3Xy3/mnxJp/5cVhZ2XoCAxJnmNsEyeMJtpK+wu0FJFWz+QF4mjCA7AUQ3w==} + '@rollup/rollup-linux-arm64-gnu@4.59.0': + resolution: {integrity: sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==} cpu: [arm64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-arm64-musl@4.53.3': - resolution: {integrity: sha512-kASyvfBEWYPEwe0Qv4nfu6pNkITLTb32p4yTgzFCocHnJLAHs+9LjUu9ONIhvfT/5lv4YS5muBHyuV84epBo/A==} + '@rollup/rollup-linux-arm64-musl@4.59.0': + resolution: {integrity: sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==} cpu: [arm64] os: [linux] + libc: [musl] - '@rollup/rollup-linux-loong64-gnu@4.53.3': - resolution: {integrity: sha512-JiuKcp2teLJwQ7vkJ95EwESWkNRFJD7TQgYmCnrPtlu50b4XvT5MOmurWNrCj3IFdyjBQ5p9vnrX4JM6I8OE7g==} + '@rollup/rollup-linux-loong64-gnu@4.59.0': + resolution: {integrity: sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==} cpu: [loong64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-ppc64-gnu@4.53.3': - resolution: {integrity: sha512-EoGSa8nd6d3T7zLuqdojxC20oBfNT8nexBbB/rkxgKj5T5vhpAQKKnD+h3UkoMuTyXkP5jTjK/ccNRmQrPNDuw==} + '@rollup/rollup-linux-loong64-musl@4.59.0': + resolution: {integrity: sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==} + cpu: [loong64] + os: [linux] + libc: [musl] + + '@rollup/rollup-linux-ppc64-gnu@4.59.0': + resolution: {integrity: sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==} cpu: [ppc64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-riscv64-gnu@4.53.3': - resolution: {integrity: sha512-4s+Wped2IHXHPnAEbIB0YWBv7SDohqxobiiPA1FIWZpX+w9o2i4LezzH/NkFUl8LRci/8udci6cLq+jJQlh+0g==} + '@rollup/rollup-linux-ppc64-musl@4.59.0': + resolution: {integrity: sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==} + cpu: [ppc64] + os: [linux] + libc: [musl] + + '@rollup/rollup-linux-riscv64-gnu@4.59.0': + resolution: {integrity: sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==} cpu: [riscv64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-riscv64-musl@4.53.3': - resolution: {integrity: sha512-68k2g7+0vs2u9CxDt5ktXTngsxOQkSEV/xBbwlqYcUrAVh6P9EgMZvFsnHy4SEiUl46Xf0IObWVbMvPrr2gw8A==} + '@rollup/rollup-linux-riscv64-musl@4.59.0': + resolution: {integrity: sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==} cpu: [riscv64] os: [linux] + libc: [musl] - '@rollup/rollup-linux-s390x-gnu@4.53.3': - resolution: {integrity: sha512-VYsFMpULAz87ZW6BVYw3I6sWesGpsP9OPcyKe8ofdg9LHxSbRMd7zrVrr5xi/3kMZtpWL/wC+UIJWJYVX5uTKg==} + '@rollup/rollup-linux-s390x-gnu@4.59.0': + resolution: {integrity: sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==} cpu: [s390x] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-x64-gnu@4.53.3': - resolution: {integrity: sha512-3EhFi1FU6YL8HTUJZ51imGJWEX//ajQPfqWLI3BQq4TlvHy4X0MOr5q3D2Zof/ka0d5FNdPwZXm3Yyib/UEd+w==} + '@rollup/rollup-linux-x64-gnu@4.59.0': + resolution: {integrity: sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==} cpu: [x64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-x64-musl@4.53.3': - resolution: {integrity: sha512-eoROhjcc6HbZCJr+tvVT8X4fW3/5g/WkGvvmwz/88sDtSJzO7r/blvoBDgISDiCjDRZmHpwud7h+6Q9JxFwq1Q==} + '@rollup/rollup-linux-x64-musl@4.59.0': + resolution: {integrity: sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==} cpu: [x64] os: [linux] + libc: [musl] + + '@rollup/rollup-openbsd-x64@4.59.0': + resolution: {integrity: sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==} + cpu: [x64] + os: [openbsd] - '@rollup/rollup-openharmony-arm64@4.53.3': - resolution: {integrity: sha512-OueLAWgrNSPGAdUdIjSWXw+u/02BRTcnfw9PN41D2vq/JSEPnJnVuBgw18VkN8wcd4fjUs+jFHVM4t9+kBSNLw==} + '@rollup/rollup-openharmony-arm64@4.59.0': + resolution: {integrity: sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==} cpu: [arm64] os: [openharmony] - '@rollup/rollup-win32-arm64-msvc@4.53.3': - resolution: {integrity: sha512-GOFuKpsxR/whszbF/bzydebLiXIHSgsEUp6M0JI8dWvi+fFa1TD6YQa4aSZHtpmh2/uAlj/Dy+nmby3TJ3pkTw==} + '@rollup/rollup-win32-arm64-msvc@4.59.0': + resolution: {integrity: sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==} cpu: [arm64] os: [win32] - '@rollup/rollup-win32-ia32-msvc@4.53.3': - resolution: {integrity: sha512-iah+THLcBJdpfZ1TstDFbKNznlzoxa8fmnFYK4V67HvmuNYkVdAywJSoteUszvBQ9/HqN2+9AZghbajMsFT+oA==} + '@rollup/rollup-win32-ia32-msvc@4.59.0': + resolution: {integrity: sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==} cpu: [ia32] os: [win32] - '@rollup/rollup-win32-x64-gnu@4.53.3': - resolution: {integrity: sha512-J9QDiOIZlZLdcot5NXEepDkstocktoVjkaKUtqzgzpt2yWjGlbYiKyp05rWwk4nypbYUNoFAztEgixoLaSETkg==} + '@rollup/rollup-win32-x64-gnu@4.59.0': + resolution: {integrity: sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==} cpu: [x64] os: [win32] - '@rollup/rollup-win32-x64-msvc@4.53.3': - resolution: {integrity: sha512-UhTd8u31dXadv0MopwGgNOBpUVROFKWVQgAg5N1ESyCz8AuBcMqm4AuTjrwgQKGDfoFuz02EuMRHQIw/frmYKQ==} + '@rollup/rollup-win32-x64-msvc@4.59.0': + resolution: {integrity: sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==} cpu: [x64] os: [win32] @@ -1381,24 +1412,28 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] '@tailwindcss/oxide-linux-arm64-musl@4.1.17': resolution: {integrity: sha512-HvZLfGr42i5anKtIeQzxdkw/wPqIbpeZqe7vd3V9vI3RQxe3xU1fLjss0TjyhxWcBaipk7NYwSrwTwK1hJARMg==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] '@tailwindcss/oxide-linux-x64-gnu@4.1.17': resolution: {integrity: sha512-M3XZuORCGB7VPOEDH+nzpJ21XPvK5PyjlkSFkFziNHGLc5d6g3di2McAAblmaSUNl8IOmzYwLx9NsE7bplNkwQ==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] '@tailwindcss/oxide-linux-x64-musl@4.1.17': resolution: {integrity: sha512-k7f+pf9eXLEey4pBlw+8dgfJHY4PZ5qOUFDyNf7SI6lHjQ9Zt7+NcscjpwdCEbYi6FI5c2KDTDWyf2iHcCSyyQ==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] '@tailwindcss/oxide-wasm32-wasi@4.1.17': resolution: {integrity: sha512-cEytGqSSoy7zK4JRWiTCx43FsKP/zGr0CsuMawhH67ONlH+T79VteQeJQRO/X7L0juEUA8ZyuYikcRBf0vsxhg==} @@ -1677,6 +1712,10 @@ packages: balanced-match@1.0.2: resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + balanced-match@4.0.4: + resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==} + engines: {node: 18 || 20 || >=22} + baseline-browser-mapping@2.9.6: resolution: {integrity: sha512-v9BVVpOTLB59C9E7aSnmIF8h7qRsFpx+A2nugVMTszEOMcfjlZMsXRm4LF23I3Z9AJxc8ANpIvzbzONoX9VJlg==} hasBin: true @@ -1684,8 +1723,9 @@ packages: brace-expansion@1.1.12: resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==} - brace-expansion@2.0.2: - resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==} + brace-expansion@5.0.4: + resolution: {integrity: sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==} + engines: {node: 18 || 20 || >=22} browserslist@4.28.1: resolution: {integrity: sha512-ZC5Bd0LgJXgwGqUknZY/vkUQ04r8NXnJZ3yYi4vDmSiZmC/pdSN0NbNRPxZpbtO4uAfDUAFffO8IZoM3Gj8IkA==} @@ -2135,24 +2175,28 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [glibc] lightningcss-linux-arm64-musl@1.30.2: resolution: {integrity: sha512-5Vh9dGeblpTxWHpOx8iauV02popZDsCYMPIgiuw97OJ5uaDsL86cnqSFs5LZkG3ghHoX5isLgWzMs+eD1YzrnA==} engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [musl] lightningcss-linux-x64-gnu@1.30.2: resolution: {integrity: sha512-Cfd46gdmj1vQ+lR6VRTTadNHu6ALuw2pKR9lYq4FnhvgBc4zWY1EtZcAc6EffShbb1MFrIPfLDXD6Xprbnni4w==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [glibc] lightningcss-linux-x64-musl@1.30.2: resolution: {integrity: sha512-XJaLUUFXb6/QG2lGIW6aIk6jKdtjtcffUT0NKvIqhSBY3hh9Ch+1LCeH80dR9q9LBjG3ewbDjnumefsLsP6aiA==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [musl] lightningcss-win32-arm64-msvc@1.30.2: resolution: {integrity: sha512-FZn+vaj7zLv//D/192WFFVA0RgHawIcHqLX9xuWiQt7P0PtdFEVaxgF9rjM/IRYHQXNnk61/H/gb2Ei+kUQ4xQ==} @@ -2214,11 +2258,11 @@ packages: resolution: {integrity: sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg==} engines: {node: '>=4'} - minimatch@3.1.2: - resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} + minimatch@3.1.4: + resolution: {integrity: sha512-twmL+S8+7yIsE9wsqgzU3E8/LumN3M3QELrBZ20OdmQ9jB2JvW5oZtBEmft84k/Gs5CG9mqtWc6Y9vW+JEzGxw==} - minimatch@9.0.5: - resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==} + minimatch@9.0.7: + resolution: {integrity: sha512-MOwgjc8tfrpn5QQEvjijjmDVtMw2oL88ugTevzxQnzRLm6l3fVEF2gzU0kYeYYKD8C66+IdGX6peJ4MyUlUnPg==} engines: {node: '>=16 || 14 >=14.17'} minipass@7.1.2: @@ -2400,8 +2444,8 @@ packages: resolution: {integrity: sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==} engines: {node: '>=4'} - rollup@4.53.3: - resolution: {integrity: sha512-w8GmOxZfBmKknvdXU1sdM9NHcoQejwF/4mNgj2JuEEdRaHwwF12K7e9eXn1nLZ07ad+du76mkVsyeb2rKGllsA==} + rollup@4.59.0: + resolution: {integrity: sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==} engines: {node: '>=18.0.0', npm: '>=8.0.0'} hasBin: true @@ -3031,7 +3075,7 @@ snapshots: dependencies: '@eslint/object-schema': 2.1.7 debug: 4.4.3 - minimatch: 3.1.2 + minimatch: 3.1.4 transitivePeerDependencies: - supports-color @@ -3052,7 +3096,7 @@ snapshots: ignore: 5.3.2 import-fresh: 3.3.1 js-yaml: 4.1.1 - minimatch: 3.1.2 + minimatch: 3.1.4 strip-json-comments: 3.1.1 transitivePeerDependencies: - supports-color @@ -3910,70 +3954,79 @@ snapshots: '@rolldown/pluginutils@1.0.0-beta.27': {} - '@rollup/rollup-android-arm-eabi@4.53.3': + '@rollup/rollup-android-arm-eabi@4.59.0': + optional: true + + '@rollup/rollup-android-arm64@4.59.0': + optional: true + + '@rollup/rollup-darwin-arm64@4.59.0': optional: true - '@rollup/rollup-android-arm64@4.53.3': + '@rollup/rollup-darwin-x64@4.59.0': optional: true - '@rollup/rollup-darwin-arm64@4.53.3': + '@rollup/rollup-freebsd-arm64@4.59.0': optional: true - '@rollup/rollup-darwin-x64@4.53.3': + '@rollup/rollup-freebsd-x64@4.59.0': optional: true - '@rollup/rollup-freebsd-arm64@4.53.3': + '@rollup/rollup-linux-arm-gnueabihf@4.59.0': optional: true - '@rollup/rollup-freebsd-x64@4.53.3': + '@rollup/rollup-linux-arm-musleabihf@4.59.0': optional: true - '@rollup/rollup-linux-arm-gnueabihf@4.53.3': + '@rollup/rollup-linux-arm64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-arm-musleabihf@4.53.3': + '@rollup/rollup-linux-arm64-musl@4.59.0': optional: true - '@rollup/rollup-linux-arm64-gnu@4.53.3': + '@rollup/rollup-linux-loong64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-arm64-musl@4.53.3': + '@rollup/rollup-linux-loong64-musl@4.59.0': optional: true - '@rollup/rollup-linux-loong64-gnu@4.53.3': + '@rollup/rollup-linux-ppc64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-ppc64-gnu@4.53.3': + '@rollup/rollup-linux-ppc64-musl@4.59.0': optional: true - '@rollup/rollup-linux-riscv64-gnu@4.53.3': + '@rollup/rollup-linux-riscv64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-riscv64-musl@4.53.3': + '@rollup/rollup-linux-riscv64-musl@4.59.0': optional: true - '@rollup/rollup-linux-s390x-gnu@4.53.3': + '@rollup/rollup-linux-s390x-gnu@4.59.0': optional: true - '@rollup/rollup-linux-x64-gnu@4.53.3': + '@rollup/rollup-linux-x64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-x64-musl@4.53.3': + '@rollup/rollup-linux-x64-musl@4.59.0': optional: true - '@rollup/rollup-openharmony-arm64@4.53.3': + '@rollup/rollup-openbsd-x64@4.59.0': optional: true - '@rollup/rollup-win32-arm64-msvc@4.53.3': + '@rollup/rollup-openharmony-arm64@4.59.0': optional: true - '@rollup/rollup-win32-ia32-msvc@4.53.3': + '@rollup/rollup-win32-arm64-msvc@4.59.0': optional: true - '@rollup/rollup-win32-x64-gnu@4.53.3': + '@rollup/rollup-win32-ia32-msvc@4.59.0': optional: true - '@rollup/rollup-win32-x64-msvc@4.53.3': + '@rollup/rollup-win32-x64-gnu@4.59.0': + optional: true + + '@rollup/rollup-win32-x64-msvc@4.59.0': optional: true '@tailwindcss/node@4.1.17': @@ -4198,7 +4251,7 @@ snapshots: '@typescript-eslint/types': 8.49.0 '@typescript-eslint/visitor-keys': 8.49.0 debug: 4.4.3 - minimatch: 9.0.5 + minimatch: 9.0.7 semver: 7.7.3 tinyglobby: 0.2.15 ts-api-utils: 2.1.0(typescript@5.8.3) @@ -4355,6 +4408,8 @@ snapshots: balanced-match@1.0.2: {} + balanced-match@4.0.4: {} + baseline-browser-mapping@2.9.6: {} brace-expansion@1.1.12: @@ -4362,9 +4417,9 @@ snapshots: balanced-match: 1.0.2 concat-map: 0.0.1 - brace-expansion@2.0.2: + brace-expansion@5.0.4: dependencies: - balanced-match: 1.0.2 + balanced-match: 4.0.4 browserslist@4.28.1: dependencies: @@ -4557,7 +4612,7 @@ snapshots: is-glob: 4.0.3 json-stable-stringify-without-jsonify: 1.0.1 lodash.merge: 4.6.2 - minimatch: 3.1.2 + minimatch: 3.1.4 natural-compare: 1.4.0 optionator: 0.9.4 optionalDependencies: @@ -4639,7 +4694,7 @@ snapshots: dependencies: foreground-child: 3.3.1 jackspeak: 3.4.3 - minimatch: 9.0.5 + minimatch: 9.0.7 minipass: 7.1.2 package-json-from-dist: 1.0.1 path-scurry: 1.11.1 @@ -4871,13 +4926,13 @@ snapshots: min-indent@1.0.1: {} - minimatch@3.1.2: + minimatch@3.1.4: dependencies: brace-expansion: 1.1.12 - minimatch@9.0.5: + minimatch@9.0.7: dependencies: - brace-expansion: 2.0.2 + brace-expansion: 5.0.4 minipass@7.1.2: {} @@ -5082,32 +5137,35 @@ snapshots: resolve-from@4.0.0: {} - rollup@4.53.3: + rollup@4.59.0: dependencies: '@types/estree': 1.0.8 optionalDependencies: - '@rollup/rollup-android-arm-eabi': 4.53.3 - '@rollup/rollup-android-arm64': 4.53.3 - '@rollup/rollup-darwin-arm64': 4.53.3 - '@rollup/rollup-darwin-x64': 4.53.3 - '@rollup/rollup-freebsd-arm64': 4.53.3 - '@rollup/rollup-freebsd-x64': 4.53.3 - '@rollup/rollup-linux-arm-gnueabihf': 4.53.3 - '@rollup/rollup-linux-arm-musleabihf': 4.53.3 - '@rollup/rollup-linux-arm64-gnu': 4.53.3 - '@rollup/rollup-linux-arm64-musl': 4.53.3 - '@rollup/rollup-linux-loong64-gnu': 4.53.3 - '@rollup/rollup-linux-ppc64-gnu': 4.53.3 - '@rollup/rollup-linux-riscv64-gnu': 4.53.3 - '@rollup/rollup-linux-riscv64-musl': 4.53.3 - '@rollup/rollup-linux-s390x-gnu': 4.53.3 - '@rollup/rollup-linux-x64-gnu': 4.53.3 - '@rollup/rollup-linux-x64-musl': 4.53.3 - '@rollup/rollup-openharmony-arm64': 4.53.3 - '@rollup/rollup-win32-arm64-msvc': 4.53.3 - '@rollup/rollup-win32-ia32-msvc': 4.53.3 - '@rollup/rollup-win32-x64-gnu': 4.53.3 - '@rollup/rollup-win32-x64-msvc': 4.53.3 + '@rollup/rollup-android-arm-eabi': 4.59.0 + '@rollup/rollup-android-arm64': 4.59.0 + '@rollup/rollup-darwin-arm64': 4.59.0 + '@rollup/rollup-darwin-x64': 4.59.0 + '@rollup/rollup-freebsd-arm64': 4.59.0 + '@rollup/rollup-freebsd-x64': 4.59.0 + '@rollup/rollup-linux-arm-gnueabihf': 4.59.0 + '@rollup/rollup-linux-arm-musleabihf': 4.59.0 + '@rollup/rollup-linux-arm64-gnu': 4.59.0 + '@rollup/rollup-linux-arm64-musl': 4.59.0 + '@rollup/rollup-linux-loong64-gnu': 4.59.0 + '@rollup/rollup-linux-loong64-musl': 4.59.0 + '@rollup/rollup-linux-ppc64-gnu': 4.59.0 + '@rollup/rollup-linux-ppc64-musl': 4.59.0 + '@rollup/rollup-linux-riscv64-gnu': 4.59.0 + '@rollup/rollup-linux-riscv64-musl': 4.59.0 + '@rollup/rollup-linux-s390x-gnu': 4.59.0 + '@rollup/rollup-linux-x64-gnu': 4.59.0 + '@rollup/rollup-linux-x64-musl': 4.59.0 + '@rollup/rollup-openbsd-x64': 4.59.0 + '@rollup/rollup-openharmony-arm64': 4.59.0 + '@rollup/rollup-win32-arm64-msvc': 4.59.0 + '@rollup/rollup-win32-ia32-msvc': 4.59.0 + '@rollup/rollup-win32-x64-gnu': 4.59.0 + '@rollup/rollup-win32-x64-msvc': 4.59.0 fsevents: 2.3.3 rrweb-cssom@0.8.0: {} @@ -5192,7 +5250,7 @@ snapshots: dependencies: '@istanbuljs/schema': 0.1.3 glob: 10.5.0 - minimatch: 9.0.5 + minimatch: 9.0.7 tinybench@2.9.0: {} @@ -5308,7 +5366,7 @@ snapshots: fdir: 6.5.0(picomatch@4.0.3) picomatch: 4.0.3 postcss: 8.5.6 - rollup: 4.53.3 + rollup: 4.59.0 tinyglobby: 0.2.15 optionalDependencies: '@types/node': 24.10.3 diff --git a/frontend/src/hooks/__tests__/useMarkdownRenderer.test.ts b/frontend/src/hooks/__tests__/useMarkdownRenderer.test.ts new file mode 100644 index 000000000..d0030af95 --- /dev/null +++ b/frontend/src/hooks/__tests__/useMarkdownRenderer.test.ts @@ -0,0 +1,76 @@ +import { describe, it, expect } from 'vitest'; +import { renderHook } from '../../test/utils'; +import { useMarkdownRenderer } from '../useMarkdownRenderer'; + +describe('useMarkdownRenderer', () => { + describe('XSS Prevention', () => { + it('strips script tags from rendered output', () => { + const { result } = renderHook(() => useMarkdownRenderer()); + const output = result.current.renderMarkdown( + '' + ); + expect(output).not.toContain('