Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions nemo_retriever/harness/examples/managed-helm-nrl-26.05.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Managed NRL Helm service example for the 26.05 perf sweep.
#
# Run from the repository root:
# retriever harness run --config nemo_retriever/harness/examples/managed-helm-nrl-26.05.yaml
#
# This expects the target namespace to already contain these non-chart-managed secrets:
# - ngc-secret: docker registry pull secret for nvcr.io
# - ngc-api: opaque secret with the NGC API key entries required by the NIM operator
# Keep credentials out of this file and out of command history.

# RC6 is the current known-good chart for core NIM coverage. When a new RC is
# validated, bump the chart version below. If the service image RC differs from
# the chart package RC, change only x-nrl-service-image-tag.
x-nrl-chart-version: &nrl_chart_version "26.05-RC6"
x-nrl-service-image-tag: &nrl_service_image_tag "26.05-RC6"

# This runner class uses A100 MIG resources rather than nvidia.com/gpu.
# These maps are passed through helm_set as --set-json and replace each NIM's
# default full-GPU limit. Adjust or remove them for non-MIG clusters.
x-a100-mig-1g-resource: &a100_mig_1g_resource
limits:
nvidia.com/mig-1g.10gb: 1
x-a100-mig-2g-resource: &a100_mig_2g_resource
limits:
nvidia.com/mig-2g.20gb: 1

active:
dataset: bo767
preset: dgx_8gpu
run_mode: service
manage_service: true
keep_up: false

input_type: pdf
recall_required: false
evaluation_mode: none
service_max_concurrency: 8

helm_bin: microk8s helm
kubectl_bin: microk8s kubectl
helm_sudo: true
kubectl_sudo: true

helm_chart: nim-nvstaging/nemo-retriever
helm_chart_version: *nrl_chart_version
helm_release: nv-ingest
helm_namespace: nv-ingest
helm_values_file: ../helm-profiles/core.yaml
helm_timeout: 1800
readiness_timeout: 1800
helm_service_local_port: 17670

helm_set:
# RC chart defaults still point at localhost:32000/nemo-retriever-service:latest.
# Keep these service.image overrides for remote RC chart runs.
service.image.repository: nvcr.io/nvstaging/nim/nrl-service
service.image.tag: *nrl_service_image_tag
service.image.pullPolicy: Always
ngcImagePullSecret.create: false
ngcImagePullSecret.name: ngc-secret
ngcApiSecret.create: false
ngcApiSecret.name: ngc-api
# Replace the chart's default nvidia.com/gpu NIM requests for A100 MIG nodes.
nimOperator.page_elements.resources: *a100_mig_1g_resource
nimOperator.table_structure.resources: *a100_mig_1g_resource
nimOperator.ocr.resources: *a100_mig_2g_resource
nimOperator.vlm_embed.resources: *a100_mig_2g_resource

presets:
# Service-mode runs use service_max_concurrency; chart sizing lives in Helm values.
dgx_8gpu: {}

datasets:
bo767:
path: /datasets/nv-ingest/bo767
input_type: pdf
recall_required: false
evaluation_mode: none
30 changes: 30 additions & 0 deletions nemo_retriever/harness/helm-profiles/all-optional.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Full optional-service coverage profile.
topology:
mode: standalone
otel:
enabled: true

serviceConfig:
vectordb:
enabled: true

nims:
enabled: true

nimOperator:
page_elements:
enabled: true
table_structure:
enabled: true
ocr:
enabled: true
vlm_embed:
enabled: true
rerankqa:
enabled: true
nemotron_parse:
enabled: true
nemotron_3_nano_omni_30b_a3b_reasoning:
enabled: true
audio:
enabled: true
35 changes: 35 additions & 0 deletions nemo_retriever/harness/helm-profiles/audio-video.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Audio/video managed service profile. The audio NIM is gRPC-only.
service:
installFfmpeg: true

topology:
mode: standalone
otel:
enabled: false

serviceConfig:
nimEndpoints:
audioGrpcEndpoint: "audio:50051"
vectordb:
enabled: true

nims:
enabled: true

nimOperator:
page_elements:
enabled: true
table_structure:
enabled: true
ocr:
enabled: true
vlm_embed:
enabled: true
rerankqa:
enabled: false
nemotron_parse:
enabled: false
nemotron_3_nano_omni_30b_a3b_reasoning:
enabled: false
audio:
enabled: true
30 changes: 30 additions & 0 deletions nemo_retriever/harness/helm-profiles/core.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Core managed NRL service profile: service, VectorDB, and core NIMs.
topology:
mode: standalone
otel:
enabled: false

serviceConfig:
vectordb:
enabled: true

nims:
enabled: true

nimOperator:
page_elements:
enabled: true
table_structure:
enabled: true
ocr:
enabled: true
vlm_embed:
enabled: true
rerankqa:
enabled: false
nemotron_parse:
enabled: false
nemotron_3_nano_omni_30b_a3b_reasoning:
enabled: false
audio:
enabled: false
36 changes: 36 additions & 0 deletions nemo_retriever/harness/helm-profiles/no-nims-external.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Service-only profile for externally managed NIM endpoints.
topology:
mode: standalone
otel:
enabled: false

serviceConfig:
nimEndpoints:
pageElementsInvokeUrl: "http://page-elements-nim.example.invalid:8000/v1/infer"
tableStructureInvokeUrl: "http://table-structure-nim.example.invalid:8000/v1/infer"
ocrInvokeUrl: "http://ocr-nim.example.invalid:8000/v1/infer"
embedInvokeUrl: "http://embed-nim.example.invalid:8000/v1/embeddings"
audioGrpcEndpoint: "audio-nim.example.invalid:50051"
vectordb:
enabled: true

nims:
enabled: false

nimOperator:
page_elements:
enabled: false
table_structure:
enabled: false
ocr:
enabled: false
vlm_embed:
enabled: false
rerankqa:
enabled: false
nemotron_parse:
enabled: false
nemotron_3_nano_omni_30b_a3b_reasoning:
enabled: false
audio:
enabled: false
52 changes: 52 additions & 0 deletions nemo_retriever/harness/helm-profiles/split.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Split-topology profile for gateway/realtime/batch readiness coverage.
topology:
mode: split
otel:
enabled: true
realtime:
hpa:
metrics:
cpu:
enabled: true
queueDepthRatio:
enabled: false
processingLatencyP95:
enabled: false
batch:
hpa:
metrics:
cpu:
enabled: true
queueDepthRatio:
enabled: false
processingLatencyP95:
enabled: false

autoscaling:
queueDepth:
backend: cpu

serviceConfig:
vectordb:
enabled: true

nims:
enabled: true

nimOperator:
page_elements:
enabled: true
table_structure:
enabled: true
ocr:
enabled: true
vlm_embed:
enabled: true
rerankqa:
enabled: false
nemotron_parse:
enabled: false
nemotron_3_nano_omni_30b_a3b_reasoning:
enabled: false
audio:
enabled: false
Loading
Loading