Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions nemo_retriever/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,26 +52,34 @@ dependencies = [
# HTTP clients
"httpx>=0.27.0",
"requests>=2.32.5",
"aiohttp>=3.12.0",
"urllib3>=2.7.0",
# Utilities
"pydantic>=2.8.0",
"rich>=13.7.0",
"universal-pathlib>=0.2.0",
"numpy>=1.26.0",
"python-dateutil>=2.9.0",
"debugpy>=1.8.0",
"fsspec>=2025.5.1",
"s3fs>=2025.5.1",
"fastparquet>=2024.11.0,<2026",
# Core ingest packages
# Document parsing and NIM client libs
"pypdfium2==4.30.0",
"pillow==12.2.0",
"opencv-python-headless>=4.8.0",
"scikit-learn>=1.6.0",
"scipy>=1.11.0",
"nltk>=3.9.4",
"markitdown",
"langchain-nvidia-ai-endpoints>=0.3.0",
"unstructured-client",
# Default VDB solution
"lancedb",
# gRPC client for Parakeet/Riva ASR. Required for ASRCPUActor when it
# targets the public NVCF Parakeet endpoint (the default) or any remote NIM.
"grpcio",
"nvidia-riva-client>=2.25.1",
]

Expand All @@ -94,7 +102,6 @@ service = [
"glom",
"easydict",
"addict",
"scikit-learn>=1.6.0",
"psutil>=5.9.0",
"apscheduler>=3.10",
# Audio resampling used by ParakeetClient
Expand All @@ -109,7 +116,6 @@ local = [
"transformers>=4.57.6,<5",
"tokenizers>=0.21.1",
"accelerate==1.12.0",
"opencv-python-headless>=4.8.0",
"torch==2.11.0; sys_platform == 'linux'",
"torch==2.11.0; sys_platform == 'win32'",
"torch==2.11.0; sys_platform == 'darwin'",
Expand All @@ -120,7 +126,6 @@ local = [
"einops",
"easydict",
"addict",
"scikit-learn>=1.6.0",
"timm==1.0.22",
"albumentations==2.0.8",
"nemotron-page-elements-v3>=0.dev0",
Expand All @@ -141,11 +146,10 @@ local = [
]

# ── Multimedia — audio/ASR and SVG rendering ────────────────────────────────
# soundfile + scipy enable local Parakeet ASR on audio/video content.
# soundfile enables local Parakeet ASR on audio/video content.
# cairosvg enables SVG-to-image rendering (requires libcairo system library).
multimedia = [
"soundfile>=0.12.0",
"scipy>=1.11.0",
"cairosvg>=2.7.0",
"librosa>=0.10.2",
]
Expand Down Expand Up @@ -189,6 +193,12 @@ dev = [
"pytest>=8.0.2",
]

test = [
# MoviePy 2.x caps Pillow below 12; core currently pins Pillow 12.2.0.
"moviepy<2",
"pytest>=8.0.2",
]

# ── Convenience: full install ─────────────────────────────────────────────────
all = [
"nemo_retriever[service,local,multimedia,nemotron-parse,tabular,benchmarks,llm]",
Expand Down
Loading