diff --git a/.github/workflows/release-helm.yml b/.github/workflows/release-helm.yml index 79bbf7b6a..a2201d827 100644 --- a/.github/workflows/release-helm.yml +++ b/.github/workflows/release-helm.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: version: - description: 'Chart version (e.g. 26.05-RC1)' + description: 'Chart version (e.g. 26.5.0)' required: true type: string source-ref: diff --git a/nemo_retriever/helm/Chart.yaml b/nemo_retriever/helm/Chart.yaml index 1554e0bd4..4c7355d4b 100644 --- a/nemo_retriever/helm/Chart.yaml +++ b/nemo_retriever/helm/Chart.yaml @@ -18,8 +18,8 @@ description: | shared PostgreSQL backend so the service can scale horizontally. type: application -version: 26.05-RC1 -appVersion: "26.05-RC1" +version: "26.5.0" +appVersion: "26.5.0" kubeVersion: ">=1.25.0-0" home: https://github.com/NVIDIA/NeMo-Retriever sources: diff --git a/nemo_retriever/helm/values.yaml b/nemo_retriever/helm/values.yaml index 9fa45dff2..2486feb49 100644 --- a/nemo_retriever/helm/values.yaml +++ b/nemo_retriever/helm/values.yaml @@ -67,13 +67,13 @@ imagePullSecrets: [] # ============================================================================= service: image: - # Default points at the staging image published to NGC. Override + # Default points at the GA image published to NGC. Override # `repository` / `tag` to pin a different build, e.g. one produced by: - # docker build -f nemo_retriever/Dockerfile --target service \ + # docker build -f Dockerfile --target service \ # -t /nemo-retriever-service: . - repository: localhost:32000/nemo-retriever-service - tag: "latest" - pullPolicy: Always + repository: nvcr.io/nvidia/nemo-microservices/nrl-service + tag: "26.5.0" + pullPolicy: IfNotPresent # Number of pod replicas. Must stay at 1 while persistence is SQLite-backed # (RWO PVC + single writer). Bumping this requires switching to a shared diff --git a/nemo_retriever/pyproject.toml b/nemo_retriever/pyproject.toml index 355f23f58..577974ffc 100644 --- a/nemo_retriever/pyproject.toml +++ b/nemo_retriever/pyproject.toml @@ -52,7 +52,7 @@ dependencies = [ # HTTP clients "httpx>=0.27.0", "requests>=2.32.5", - "urllib3>=2.7.0", + "urllib3==2.7.0", # Utilities "pydantic>=2.8.0", "rich>=13.7.0", @@ -65,9 +65,9 @@ dependencies = [ # Document parsing and NIM client libs "pypdfium2==4.30.0", "pillow==12.2.0", - "nltk>=3.9.4", + "nltk==3.9.4", "markitdown", - "langchain-nvidia-ai-endpoints>=0.3.0", + "langchain-nvidia-ai-endpoints>=1.4.0", # Default VDB solution "lancedb", # gRPC client for Parakeet/Riva ASR. Required for ASRCPUActor when it @@ -123,11 +123,10 @@ local = [ "scikit-learn>=1.6.0", "timm==1.0.22", "albumentations==2.0.8", - "nemotron-page-elements-v3>=0.dev0", - "nemotron-graphic-elements-v1>=0.dev0", - "nemotron-table-structure-v1>=0.dev0", - # Accept the 2.0.0 stable release and newer OCR dev/final trains. - "nemotron-ocr>=2.0.0.dev0; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", + "nemotron-page-elements-v3==3.0.1", + "nemotron-graphic-elements-v1==1.0.0", + "nemotron-table-structure-v1==1.0.0", + "nemotron-ocr>=2.0.0,<3; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", "nvidia-ml-py", "apscheduler>=3.10", "psutil>=5.9.0", @@ -165,7 +164,7 @@ tabular = [ "duckdb>=1.2.0", "duckdb-engine>=0.13.0", "neo4j>=5.0", - "langgraph>=1.1.0a2", + "langgraph>=1.2.0", ] # BEIR benchmarking and evaluation tools (not needed for production use). @@ -181,7 +180,7 @@ benchmarks = [ # or construct an ``LLMJudge`` / ``LiteLLMClient`` directly. Powers both the # live-RAG SDK and the batch evaluation framework. llm = [ - "litellm>=1.86.0rc1", + "litellm>=1.86.0,<2", ] dev = [ @@ -202,10 +201,6 @@ retriever-harness = "nemo_retriever.harness:main" version = {attr = "nemo_retriever.version.get_build_version"} [tool.uv.sources] -nemotron-page-elements-v3 = { index = "test-pypi" } -nemotron-graphic-elements-v1 = { index = "test-pypi" } -nemotron-table-structure-v1 = { index = "test-pypi" } -nemotron-ocr = { index = "test-pypi" } # On Linux, resolve torch/torchvision from the CUDA wheel index. # On Mac, fall through to PyPI to get CPU wheels. torch = [ diff --git a/nemo_retriever/src/nemo_retriever/adapters/cli/main.py b/nemo_retriever/src/nemo_retriever/adapters/cli/main.py index 5185bd2c5..b6428b2f8 100644 --- a/nemo_retriever/src/nemo_retriever/adapters/cli/main.py +++ b/nemo_retriever/src/nemo_retriever/adapters/cli/main.py @@ -167,9 +167,9 @@ def ingest_command( lancedb_uri: str = typer.Option(DEFAULT_LANCEDB_URI, "--lancedb-uri", help="LanceDB database URI."), table_name: str = typer.Option(DEFAULT_TABLE_NAME, "--table-name", help="LanceDB table name."), run_mode: IngestRunModeValue = typer.Option( - "batch", + "inprocess", "--run-mode", - help="Execution mode for the SDK ingestor. Defaults to batch; use inprocess to skip Ray for local debug/CI.", + help="Execution mode for the SDK ingestor. Defaults to inprocess; use batch for Ray Data scale-out.", ), dry_run: bool = typer.Option( False, @@ -557,8 +557,8 @@ def ingest_command( # Report input-file count alongside the actual landed-row count from the # LanceDB table — they diverge whenever one document explodes into multiple # chunks (PDFs → page elements, video → audio_visual segments) or - # shrinks to zero rows when every NIM call failed. The previous message - # only reported inputs and hid both cases. ``n_rows`` is None when the + # shrinks to zero rows when every NIM call failed. The SDK rejects empty + # or unverifiable ingests before we get here; ``n_rows`` is None when the # table read itself failed (caller can still see file count + URI). n_files = len(summary["documents"]) table_path = f"{summary['lancedb_uri']}/{summary['table_name']}" diff --git a/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py b/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py index 6b3316580..0d227189c 100644 --- a/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py +++ b/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py @@ -505,7 +505,7 @@ def resolve_ingest_plan( *, profile: IngestProfileValue = "auto", input_type: IngestInputTypeValue = "auto", - run_mode: IngestRunModeValue = "batch", + run_mode: IngestRunModeValue = "inprocess", method: str | None = None, dpi: int | None = None, extract_text: bool | None = None, @@ -567,9 +567,8 @@ def resolve_ingest_plan( ) -> ResolvedIngestPlan: """Resolve root ingest options into ordinary params for one extract call. - Root ``retriever ingest`` intentionally defaults to ``run_mode="batch"``. - Programmatic callers that need Ray-free local execution should pass - ``run_mode="inprocess"`` explicitly. ``input_type`` remains a private + Root ``retriever ingest`` defaults to ``run_mode="inprocess"`` (no Ray). + Pass ``run_mode="batch"`` for Ray Data scale-out. ``input_type`` remains a private expansion/validation constraint; extraction still routes from the manifest. """ @@ -706,7 +705,7 @@ def ingest_documents( *, profile: IngestProfileValue = "auto", input_type: IngestInputTypeValue = "auto", - run_mode: IngestRunModeValue = "batch", + run_mode: IngestRunModeValue = "inprocess", dry_run: bool = False, method: str | None = None, dpi: int | None = None, @@ -778,9 +777,8 @@ def ingest_documents( Batch tuning arguments are opt-in and are translated into ``BatchTuningParams`` for extraction or embedding; they are meaningful for ``run_mode="batch"`` and ignored by callers that leave them unset. - Root ``retriever ingest`` intentionally defaults to ``run_mode="batch"``; - pass ``run_mode="inprocess"`` explicitly for local debug or CI callers - that need to skip Ray startup. + Root ``retriever ingest`` defaults to ``run_mode="inprocess"``; pass + ``run_mode="batch"`` for Ray Data scale-out. The legacy ``input_type`` argument constrains directory expansion and file validation only; extraction routing remains manifest-planned. """ diff --git a/nemo_retriever/src/nemo_retriever/graph/executor.py b/nemo_retriever/src/nemo_retriever/graph/executor.py index 3865e1b1b..cc0a74ec6 100644 --- a/nemo_retriever/src/nemo_retriever/graph/executor.py +++ b/nemo_retriever/src/nemo_retriever/graph/executor.py @@ -228,9 +228,8 @@ def build_dataset(self, data: Any, **kwargs: Any) -> Any: Returns ------- - pandas.DataFrame - The materialized result after executing the Ray Data pipeline - (``ds.to_pandas()``). + ray.data.Dataset + The lazy Ray dataset with all graph stages appended. """ import ray import ray.data as rd diff --git a/nemo_retriever/src/nemo_retriever/graph_ingestor.py b/nemo_retriever/src/nemo_retriever/graph_ingestor.py index 172eeef26..48f64967b 100644 --- a/nemo_retriever/src/nemo_retriever/graph_ingestor.py +++ b/nemo_retriever/src/nemo_retriever/graph_ingestor.py @@ -16,7 +16,7 @@ from nemo_retriever.params import ExtractParams, EmbedParams result_ds = ( - GraphIngestor(run_mode="batch") + GraphIngestor(run_mode="inprocess") .files(["/data/*.pdf"]) .extract(ExtractParams(method="pdfium")) .embed(EmbedParams(model_name="nvidia/llama-nemotron-embed-1b-v2")) @@ -387,8 +387,8 @@ class GraphIngestor(ingestor): Parameters ---------- run_mode - ``"batch"`` (Ray Data, default) or ``"inprocess"`` (single-process - pandas). + ``"inprocess"`` (single-process pandas, default) or ``"batch"`` (Ray + Data). ray_address Ray cluster address. ``None`` starts a local cluster. batch_size @@ -415,7 +415,7 @@ class GraphIngestor(ingestor): def __init__( self, *, - run_mode: str = "batch", + run_mode: str = "inprocess", documents: Optional[List[str]] = None, ray_address: Optional[str] = None, ray_log_to_driver: bool = True, diff --git a/nemo_retriever/src/nemo_retriever/harness/config.py b/nemo_retriever/src/nemo_retriever/harness/config.py index f9875364f..e506ccd04 100644 --- a/nemo_retriever/src/nemo_retriever/harness/config.py +++ b/nemo_retriever/src/nemo_retriever/harness/config.py @@ -73,7 +73,7 @@ class HarnessConfig: dataset_dir: str dataset_label: str preset: str - run_mode: str = "batch" + run_mode: str = "inprocess" query_csv: str | None = None input_type: str = "pdf" diff --git a/nemo_retriever/src/nemo_retriever/pipeline/__main__.py b/nemo_retriever/src/nemo_retriever/pipeline/__main__.py index 3d9ed7e98..f6a657ffc 100644 --- a/nemo_retriever/src/nemo_retriever/pipeline/__main__.py +++ b/nemo_retriever/src/nemo_retriever/pipeline/__main__.py @@ -8,15 +8,14 @@ Examples:: - # Batch mode (Ray) with PDF extraction + embedding + # In-process mode (default; no Ray) for local extraction + embedding retriever pipeline run /data/pdfs \\ - --run-mode batch \\ - --embed-invoke-url http://localhost:8000/v1 + --ocr-invoke-url http://localhost:9000/v1 - # In-process mode (no Ray) for quick local testing + # Batch mode (Ray) for large-scale throughput retriever pipeline run /data/pdfs \\ - --run-mode inprocess \\ - --ocr-invoke-url http://localhost:9000/v1 + --run-mode batch \\ + --embed-invoke-url http://localhost:8000/v1 # Service mode (delegate to a running retriever service) retriever pipeline run /data/pdfs \\ @@ -979,10 +978,10 @@ def run( ), # --- I/O and execution ------------------------------------------------ run_mode: str = typer.Option( - "batch", + "inprocess", "--run-mode", help=( - "Execution mode: 'batch' (Ray Data), 'inprocess' (pandas, no Ray), " + "Execution mode: 'inprocess' (pandas, no Ray, default), 'batch' (Ray Data), " "or 'service' (remote retriever service)." ), rich_help_panel=_PANEL_IO, diff --git a/nemo_retriever/src/nemo_retriever/service/app.py b/nemo_retriever/src/nemo_retriever/service/app.py index f7cef45b1..d10d89173 100644 --- a/nemo_retriever/src/nemo_retriever/service/app.py +++ b/nemo_retriever/src/nemo_retriever/service/app.py @@ -247,7 +247,7 @@ def create_app(config: ServiceConfig) -> FastAPI: app = FastAPI( title="Retriever Service", description="Low-latency document ingestion service powered by nemo-retriever", - version="1.0.0", + version="26.5.0", docs_url="/docs", lifespan=_lifespan, ) diff --git a/nemo_retriever/src/nemo_retriever/service/client.py b/nemo_retriever/src/nemo_retriever/service/client.py index 39df20a9d..e2e7a1053 100644 --- a/nemo_retriever/src/nemo_retriever/service/client.py +++ b/nemo_retriever/src/nemo_retriever/service/client.py @@ -216,6 +216,7 @@ async def _create_job( *, expected_documents: int, label: str | None = None, + retain_results: bool = False, ) -> str: """Open a server-side job aggregate and return the assigned ``job_id``. @@ -224,7 +225,10 @@ async def _create_job( call sized to the number of files supplied. """ url = f"{self._base_url}/v1/ingest/job" - payload: dict[str, Any] = {"expected_documents": expected_documents} + payload: dict[str, Any] = { + "expected_documents": expected_documents, + "retain_results": retain_results, + } if label is not None: payload["label"] = label resp = await client.post(url, json=payload) @@ -639,6 +643,7 @@ async def aingest_documents_stream( files: list[Path], *, pipeline_spec: dict[str, Any] | None = None, + retain_results: bool = False, ) -> AsyncIterator[dict[str, Any]]: """Async generator: upload files, yield events as documents complete. @@ -665,7 +670,11 @@ async def aingest_documents_stream( limits=pool_limits, headers=self._auth_headers, ) as client: - job_id = await self._create_job(client, expected_documents=len(files)) + job_id = await self._create_job( + client, + expected_documents=len(files), + retain_results=retain_results, + ) yield { "event": "job_created", "job_id": job_id, diff --git a/nemo_retriever/src/nemo_retriever/service/models/requests.py b/nemo_retriever/src/nemo_retriever/service/models/requests.py index 4bc0f42bd..91270d496 100644 --- a/nemo_retriever/src/nemo_retriever/service/models/requests.py +++ b/nemo_retriever/src/nemo_retriever/service/models/requests.py @@ -46,3 +46,12 @@ class JobCreateRequest(RichModel): expected_documents: int = Field(ge=1, description="Number of documents this job will receive") label: str | None = Field(default=None, description="Optional human-readable tag for the dashboard") metadata: dict[str, Any] = Field(default_factory=dict) + retain_results: bool = Field( + default=False, + description=( + "When false (default), completed documents keep only ``result_rows`` in the " + "job tracker; row payloads are discarded after the pipeline finishes. Set true " + "when the client will poll ``GET /v1/ingest/status/{id}`` to fetch " + "``result_data``." + ), + ) diff --git a/nemo_retriever/src/nemo_retriever/service/routers/ingest.py b/nemo_retriever/src/nemo_retriever/service/routers/ingest.py index 63117ba58..d9a8ed590 100644 --- a/nemo_retriever/src/nemo_retriever/service/routers/ingest.py +++ b/nemo_retriever/src/nemo_retriever/service/routers/ingest.py @@ -71,6 +71,7 @@ _GATEWAY_CALLBACK_HEADER = "X-Gateway-Callback-Url" _GATEWAY_PIPELINE_SPEC_HEADER = "X-Gateway-Pipeline-Spec" _GATEWAY_JOB_ID_HEADER = "X-Gateway-Job-Id" +_GATEWAY_RETAIN_RESULTS_HEADER = "X-Gateway-Retain-Results" _PAGE_THRESHOLD_FOR_BATCH = 5 # SSE keepalive cadence; tests monkey-patch this to a short value so @@ -122,6 +123,33 @@ def _is_worker(request: Request) -> bool: return _mode(request) in ("realtime", "batch") +def _retain_results_from_request(request: Request) -> bool: + val = request.headers.get(_GATEWAY_RETAIN_RESULTS_HEADER, "").strip().lower() + return val in ("1", "true", "yes") + + +def _job_retain_results(job_id: str | None) -> bool: + if not job_id: + return False + tracker = get_job_tracker() + if tracker is None: + return False + return tracker.should_retain_results(job_id) + + +def _work_item_retain_results(request: Request, *, job_id: str | None) -> bool: + """Whether the worker pool should cache row payloads for this upload.""" + if request.headers.get(_GATEWAY_DOC_ID_HEADER): + return _retain_results_from_request(request) + return _job_retain_results(job_id) + + +def _gateway_retain_results_headers(job_id: str) -> dict[str, str]: + if _job_retain_results(job_id): + return {_GATEWAY_RETAIN_RESULTS_HEADER: "true"} + return {} + + def _record_prometheus( request: Request, endpoint: str, @@ -474,6 +502,7 @@ async def create_job(request: Request, body: JobCreateRequest) -> JobCreatedResp expected_documents=body.expected_documents, label=body.label, metadata=body.metadata, + retain_results=body.retain_results, ) except JobTrackerError as exc: raise HTTPException(status_code=getattr(exc, "status_code", 500), detail=str(exc)) from exc @@ -704,6 +733,7 @@ async def submit_document_to_job( _GATEWAY_DOC_ID_HEADER: document_id, _GATEWAY_JOB_ID_HEADER: job_id, _GATEWAY_CALLBACK_HEADER: callback_url, + **_gateway_retain_results_headers(job_id), } if validated_spec is not None: extra_headers[_GATEWAY_PIPELINE_SPEC_HEADER] = validated_spec.model_dump_json() @@ -763,6 +793,7 @@ async def submit_document_to_job( callback_url=gw_callback_url, job_id=gw_job_id, pipeline_spec=worker_spec.model_dump(mode="json") if worker_spec is not None else None, + retain_results=_work_item_retain_results(request, job_id=gw_job_id), ), ) @@ -831,6 +862,7 @@ async def submit_page_to_job( _GATEWAY_DOC_ID_HEADER: page_id, _GATEWAY_JOB_ID_HEADER: job_id, _GATEWAY_CALLBACK_HEADER: callback_url, + **_gateway_retain_results_headers(job_id), }, ) @@ -892,6 +924,7 @@ async def submit_page_to_job( filename=file.filename, callback_url=gw_callback_url, job_id=gw_job_id, + retain_results=_work_item_retain_results(request, job_id=gw_job_id), ), ) @@ -963,6 +996,7 @@ async def submit_whole_document_to_job( _GATEWAY_DOC_ID_HEADER: document_id, _GATEWAY_JOB_ID_HEADER: job_id, _GATEWAY_CALLBACK_HEADER: callback_url, + **_gateway_retain_results_headers(job_id), } if validated_spec is not None: extra_headers[_GATEWAY_PIPELINE_SPEC_HEADER] = validated_spec.model_dump_json() @@ -1023,6 +1057,7 @@ async def submit_whole_document_to_job( callback_url=gw_callback_url, job_id=gw_job_id, pipeline_spec=worker_spec.model_dump(mode="json") if worker_spec is not None else None, + retain_results=_work_item_retain_results(request, job_id=gw_job_id), ), ) diff --git a/nemo_retriever/src/nemo_retriever/service/services/job_tracker.py b/nemo_retriever/src/nemo_retriever/service/services/job_tracker.py index 7a49c8c4b..844779edb 100644 --- a/nemo_retriever/src/nemo_retriever/service/services/job_tracker.py +++ b/nemo_retriever/src/nemo_retriever/service/services/job_tracker.py @@ -174,6 +174,8 @@ class JobAggregate(RichModel): label: str | None = None """Optional client-supplied tag, e.g. ``"Q4-2026-corpus"``.""" metadata: dict[str, Any] = {} + retain_results: bool = False + """When false, :meth:`JobTracker.mark_completed` drops bulky ``result_data``.""" # ── eviction tunables (apply to terminal aggregates) ────────────────── @@ -273,6 +275,7 @@ def register_job( expected_documents: int, label: str | None = None, metadata: dict[str, Any] | None = None, + retain_results: bool = False, ) -> JobAggregate: """Create a new :class:`JobAggregate` in ``pending`` state.""" if expected_documents <= 0: @@ -295,6 +298,7 @@ def register_job( created_at=_utcnow_iso(), label=label, metadata=dict(metadata or {}), + retain_results=retain_results, ) agg.counts[DocumentStatus.PENDING.value] = 0 self._jobs[job_id] = agg @@ -317,6 +321,14 @@ def all_jobs(self) -> list[JobAggregate]: with self._lock: return [a.model_copy(deep=True) for a in self._jobs.values()] + def should_retain_results(self, job_id: str | None) -> bool: + """Return whether completed row payloads should be kept for *job_id*.""" + if not job_id: + return False + with self._lock: + agg = self._jobs.get(job_id) + return bool(agg.retain_results) if agg is not None else False + def job_documents(self, job_id: str) -> list[DocumentRecord]: """Return every document record belonging to *job_id* in arrival order.""" with self._lock: @@ -492,7 +504,9 @@ def _mark_terminal( rec.status = new_status rec.completed_at = _utcnow_iso() rec.result_rows = result_rows - rec.result_data = result_data + agg_for_retain = self._jobs.get(rec.job_id) + retain_results = bool(agg_for_retain.retain_results) if agg_for_retain is not None else False + rec.result_data = result_data if retain_results else None rec.error = error if elapsed_s is not None: rec.elapsed_s = elapsed_s diff --git a/nemo_retriever/src/nemo_retriever/service/services/pipeline_pool.py b/nemo_retriever/src/nemo_retriever/service/services/pipeline_pool.py index 22a6189a8..c097233be 100644 --- a/nemo_retriever/src/nemo_retriever/service/services/pipeline_pool.py +++ b/nemo_retriever/src/nemo_retriever/service/services/pipeline_pool.py @@ -68,6 +68,7 @@ class WorkItem(RichModel): # Owning job aggregate (J1+). Always set today since the only # admission path is /v1/ingest/job/{job_id}/document. job_id: str | None = None + retain_results: bool = False # Validated per-request pipeline overrides (PipelineSpec serialised # to a dict). ``None`` means: run the legacy startup-baked pipeline. pipeline_spec: dict[str, Any] | None = None @@ -267,10 +268,17 @@ async def _worker_loop(self, worker_id: int) -> None: elif isinstance(result, int): result_rows = result + retain_results = item.retain_results + if not retain_results and item.job_id: + tracker_lookup = get_job_tracker() + if tracker_lookup is not None: + retain_results = tracker_lookup.should_retain_results(item.job_id) + if item.callback_url: - from nemo_retriever.service.services.worker_result_store import store_result_data + if retain_results: + from nemo_retriever.service.services.worker_result_store import store_result_data - store_result_data(item.id, result_data) + store_result_data(item.id, result_data) await _fire_gateway_callback( item.callback_url, item.id, @@ -281,7 +289,7 @@ async def _worker_loop(self, worker_id: int) -> None: tracker.mark_completed( item.id, result_rows=result_rows, - result_data=result_data, + result_data=result_data if retain_results else None, ) self._processed += 1 except Exception as exc: diff --git a/nemo_retriever/src/nemo_retriever/service_ingestor.py b/nemo_retriever/src/nemo_retriever/service_ingestor.py index 1c09b8d90..08a1ff9d9 100644 --- a/nemo_retriever/src/nemo_retriever/service_ingestor.py +++ b/nemo_retriever/src/nemo_retriever/service_ingestor.py @@ -1065,6 +1065,7 @@ def ingest(self, params: Any = None, **kwargs: Any) -> Any: """ return_failures, return_traces, return_results = self._resolve_execute_flags(params, kwargs) del params, kwargs + retain_results = return_results or self._save_to_disk_dir is not None result = ServiceIngestResult() traces: list[dict[str, Any]] = [] rows_by_document: dict[str, list[dict[str, Any]]] = {} @@ -1074,7 +1075,7 @@ def ingest(self, params: Any = None, **kwargs: Any) -> Any: documents_failed = 0 total_uploaded = 0 - for evt in self.ingest_stream(): + for evt in self.ingest_stream(retain_results=retain_results): if return_traces: traces.append(evt) event_type = evt.get("event") @@ -1210,7 +1211,7 @@ def _from_params(name: str, *, default: bool) -> bool: # Execution — sync streaming # ------------------------------------------------------------------ - def ingest_stream(self) -> Iterator[dict[str, Any]]: + def ingest_stream(self, *, retain_results: bool = False) -> Iterator[dict[str, Any]]: """Sync generator yielding events as documents are processed. Yields dicts with: @@ -1222,49 +1223,58 @@ def ingest_stream(self) -> Iterator[dict[str, Any]]: * ``{"event": "job_progress", "job_id": ..., "completed": ..., "failed": ..., ...}`` * ``{"event": "job_finalized"|"job_partial"|"job_failed", "job_id": ..., ...}`` """ + return self._ingest_stream_with_retain(retain_results) + + # ------------------------------------------------------------------ + # Execution — async streaming + # ------------------------------------------------------------------ + + async def aingest_stream(self, *, retain_results: bool = False) -> AsyncIterator[dict[str, Any]]: + """Async generator yielding events as documents are processed.""" files = self._collect_inputs() if not files: - return iter(()) + return self._document_ids.clear() - - def _record_doc_id(evt: dict[str, Any]) -> None: + async for evt in self._aingest_stream_impl(files, retain_results=retain_results): if evt.get("event") == "upload_complete": did = evt.get("document_id") if did: self._document_ids.append(did) - - def _factory(): - return self._wrap_for_capture(self._aingest_stream_impl(files), _record_doc_id) - - bridge = _AsyncToSyncBridge(_factory) - return iter(bridge) + yield evt # ------------------------------------------------------------------ - # Execution — async streaming + # Async helper used by both sync and async streaming entry points # ------------------------------------------------------------------ - async def aingest_stream(self) -> AsyncIterator[dict[str, Any]]: - """Async generator yielding events as documents are processed.""" + def _ingest_stream_with_retain(self, retain_results: bool) -> Iterator[dict[str, Any]]: + """Like :meth:`ingest_stream` but passes server-side retention to the HTTP client.""" files = self._collect_inputs() if not files: - return + return iter(()) self._document_ids.clear() - async for evt in self._aingest_stream_impl(files): + + def _record_doc_id(evt: dict[str, Any]) -> None: if evt.get("event") == "upload_complete": did = evt.get("document_id") if did: self._document_ids.append(did) - yield evt - # ------------------------------------------------------------------ - # Async helper used by both sync and async streaming entry points - # ------------------------------------------------------------------ + def _factory(): + return self._wrap_for_capture( + self._aingest_stream_impl(files, retain_results=retain_results), + _record_doc_id, + ) + + bridge = _AsyncToSyncBridge(_factory) + return iter(bridge) async def _aingest_stream_impl( self, files: list[Path], + *, + retain_results: bool = False, ) -> AsyncIterator[dict[str, Any]]: from nemo_retriever.service.client import RetrieverServiceClient @@ -1274,7 +1284,11 @@ async def _aingest_stream_impl( api_token=self._api_token, ) pipeline_payload = self._pipeline_payload() - async for evt in client.aingest_documents_stream(files=files, pipeline_spec=pipeline_payload): + async for evt in client.aingest_documents_stream( + files=files, + pipeline_spec=pipeline_payload, + retain_results=retain_results, + ): yield evt @staticmethod diff --git a/nemo_retriever/tests/test_harness_run.py b/nemo_retriever/tests/test_harness_run.py index afc212e19..c9eae0fd0 100644 --- a/nemo_retriever/tests/test_harness_run.py +++ b/nemo_retriever/tests/test_harness_run.py @@ -171,7 +171,7 @@ def test_build_command_uses_hidden_detection_file_by_default(tmp_path: Path) -> ) cmd, runtime_dir, detection_file, effective_query_csv = _build_command(cfg, tmp_path, run_id="r1") assert "--run-mode" in cmd - assert cmd[cmd.index("--run-mode") + 1] == "batch" + assert cmd[cmd.index("--run-mode") + 1] == "inprocess" assert "--detection-summary-file" in cmd assert "--evaluation-mode" in cmd assert cmd[cmd.index("--evaluation-mode") + 1] == "beir" @@ -1237,7 +1237,7 @@ def _fake_run_subprocess(_cmd: list[str], env_extra: dict[str, str] | None = Non "dataset_label": "jp20", "dataset_dir": str(dataset_dir), "preset": "single_gpu", - "run_mode": "batch", + "run_mode": "inprocess", "query_csv": str(query_csv), "effective_query_csv": str(query_csv), "input_type": cfg.input_type, diff --git a/nemo_retriever/tests/test_ingest_manifest.py b/nemo_retriever/tests/test_ingest_manifest.py index 444b9f328..eca890394 100644 --- a/nemo_retriever/tests/test_ingest_manifest.py +++ b/nemo_retriever/tests/test_ingest_manifest.py @@ -154,7 +154,7 @@ def test_ingest_plan_auto_profile_preserves_manifest_defaults(tmp_path) -> None: assert plan.extract_params.extract_charts is True assert plan.extract_params.extract_infographics is True assert plan.extract_params.use_page_elements is True - assert plan.create_kwargs == {"run_mode": "batch"} + assert plan.create_kwargs == {"run_mode": "inprocess"} def test_ingest_plan_fast_text_profile_is_pdf_text_only(tmp_path) -> None: diff --git a/nemo_retriever/tests/test_nemotron_ocr_v2_nightly.py b/nemo_retriever/tests/test_nemotron_ocr_v2_nightly.py index 8a8f75467..fd41e6bb1 100644 --- a/nemo_retriever/tests/test_nemotron_ocr_v2_nightly.py +++ b/nemo_retriever/tests/test_nemotron_ocr_v2_nightly.py @@ -75,9 +75,8 @@ def test_local_extra_accepts_stable_ocr_2_and_newer_dev_releases() -> None: ocr_dep = next(dep for dep in local_deps if dep.startswith("nemotron-ocr")) ocr_requirement = Requirement(ocr_dep) - assert str(ocr_requirement.specifier) == ">=2.0.0.dev0" assert ocr_requirement.specifier.contains("2.0.0") - assert ocr_requirement.specifier.contains("2.0.1.dev20260521010101") + assert not ocr_requirement.specifier.contains("3.0.0") assert ocr_requirement.specifier.contains("2.0.1") assert not ocr_requirement.specifier.contains("1.0.1") assert str(ocr_requirement.marker) == ( @@ -86,7 +85,7 @@ def test_local_extra_accepts_stable_ocr_2_and_newer_dev_releases() -> None: assert not any(dep.startswith("nemotron-ocr-v2") for dep in local_deps) assert "nemotron-ocr" in uv_tool["no-build-package"] assert "nemotron-ocr-v2" not in uv_tool["no-build-package"] - assert uv_sources["nemotron-ocr"] == {"index": "test-pypi"} + assert "nemotron-ocr" not in uv_sources assert "nemotron-ocr-v2" not in uv_sources diff --git a/nemo_retriever/tests/test_root_cli_workflow.py b/nemo_retriever/tests/test_root_cli_workflow.py index 65d82f633..55f806727 100644 --- a/nemo_retriever/tests/test_root_cli_workflow.py +++ b/nemo_retriever/tests/test_root_cli_workflow.py @@ -71,7 +71,7 @@ def fake_create_ingestor(**kwargs: Any) -> Any: result = RUNNER.invoke(cli_main.app, ["ingest", str(document)]) assert result.exit_code == 0 - assert create_calls == [{"run_mode": "batch"}] + assert create_calls == [{"run_mode": "inprocess"}] assert [method_call[0] for method_call in fake_ingestor.method_calls] == [ "files", "extract", @@ -425,8 +425,8 @@ def test_root_ingest_help_does_not_expose_input_type() -> None: assert "[auto|fast-text]" in result.output assert "--extract-images" in result.output assert "--caption" in result.output - assert "Defaults to" in result.output - assert "[default: batch]" in result.output + assert "--run-mode" in result.output + assert "[inprocess|batch" in result.output assert re.search(r"--no-caption(?!-)", result.output) is None @@ -445,7 +445,7 @@ def fail_create_ingestor(**_kwargs: Any) -> Any: payload = json.loads(result.output) assert payload["dry_run"] is True assert payload["profile"] == "fast-text" - assert payload["create_ingestor"] == {"run_mode": "batch"} + assert payload["create_ingestor"] == {"run_mode": "inprocess"} assert payload["extract"]["method"] == "pdfium" assert payload["extract"]["extract_images"] is False assert payload["extract"]["use_page_elements"] is False diff --git a/nemo_retriever/tests/test_service_ingest_async.py b/nemo_retriever/tests/test_service_ingest_async.py index 8378b2c4b..9b4e2070c 100644 --- a/nemo_retriever/tests/test_service_ingest_async.py +++ b/nemo_retriever/tests/test_service_ingest_async.py @@ -78,7 +78,8 @@ def stub_ingestor() -> Iterator[ServiceIngestor]: ing = ServiceIngestor(base_url="http://example:7670") events = _stub_event_sequence() - def _fake_stream(self: ServiceIngestor) -> Iterator[dict[str, Any]]: + def _fake_stream(self: ServiceIngestor, *, retain_results: bool = False) -> Iterator[dict[str, Any]]: + _ = retain_results return iter(events) with ( @@ -94,7 +95,7 @@ def _fake_stream(self: ServiceIngestor) -> Iterator[dict[str, Any]]: def test_ingest_default_returns_service_ingest_result(stub_ingestor: ServiceIngestor) -> None: - """Backward-compat: no flags ⇒ same ServiceIngestResult as before.""" + """Default flags ⇒ ServiceIngestResult with fetched row payloads.""" result = stub_ingestor.ingest() assert isinstance(result, ServiceIngestResult) assert not isinstance(result, tuple) diff --git a/nemo_retriever/tests/test_service_ingest_router.py b/nemo_retriever/tests/test_service_ingest_router.py index f5d42dada..28ff7932e 100644 --- a/nemo_retriever/tests/test_service_ingest_router.py +++ b/nemo_retriever/tests/test_service_ingest_router.py @@ -205,6 +205,22 @@ def test_create_job_returns_201_and_aggregate_fields(app_with_stub_pool: TestCli assert body["job_id"] +def test_create_job_retain_results_persisted_on_aggregate(app_with_stub_pool: TestClient) -> None: + from nemo_retriever.service.services.job_tracker import get_job_tracker + + resp = app_with_stub_pool.post( + "/v1/ingest/job", + json={"expected_documents": 1, "retain_results": True}, + ) + assert resp.status_code == 201, resp.text + job_id = resp.json()["job_id"] + tracker = get_job_tracker() + assert tracker is not None + agg = tracker.get_job(job_id) + assert agg is not None + assert agg.retain_results is True + + def test_get_job_returns_aggregate_snapshot(app_with_stub_pool: TestClient) -> None: job_id = create_test_job(app_with_stub_pool, expected_documents=2) resp = app_with_stub_pool.get(f"/v1/ingest/job/{job_id}") diff --git a/nemo_retriever/tests/test_service_job_tracker.py b/nemo_retriever/tests/test_service_job_tracker.py index 5ec45ac97..a5dd1f15a 100644 --- a/nemo_retriever/tests/test_service_job_tracker.py +++ b/nemo_retriever/tests/test_service_job_tracker.py @@ -204,7 +204,7 @@ def test_mark_processing_is_idempotent() -> None: def test_mark_completed_updates_counts_and_doc_record() -> None: tracker, _bus = _make_tracker_with_bus() - tracker.register_job("j", expected_documents=2) + tracker.register_job("j", expected_documents=2, retain_results=True) tracker.register_document("a", job_id="j") tracker.register_document("b", job_id="j") tracker.mark_processing("a") @@ -214,6 +214,7 @@ def test_mark_completed_updates_counts_and_doc_record() -> None: assert rec is not None assert rec.status == DocumentStatus.COMPLETED assert rec.result_rows == 42 + assert rec.result_data == [{"k": "v"}] assert rec.completed_at is not None agg = tracker.get_job("j") @@ -528,9 +529,21 @@ def test_summary_groups_by_job_aggregate_status() -> None: assert summary[JobAggregateStatus.FAILED.value] == 1 +def test_mark_completed_drops_result_data_when_retain_false() -> None: + tracker = JobTracker() + tracker.register_job("j", expected_documents=1, retain_results=False) + tracker.register_document("d", job_id="j") + tracker.mark_completed("d", result_rows=3, result_data=[{"x": 1}]) + rec = tracker.get_document("d") + assert rec is not None + assert rec.result_rows == 3 + assert rec.result_data is None + assert tracker.consume_result_data("d") is None + + def test_consume_result_data_clears_after_read() -> None: tracker = JobTracker() - tracker.register_job("j", expected_documents=1) + tracker.register_job("j", expected_documents=1, retain_results=True) tracker.register_document("d", job_id="j") tracker.mark_completed("d", result_data=[{"x": 1}]) assert tracker.consume_result_data("d") == [{"x": 1}] diff --git a/nemo_retriever/uv.lock b/nemo_retriever/uv.lock index aa7ab1fb8..0090d174e 100644 --- a/nemo_retriever/uv.lock +++ b/nemo_retriever/uv.lock @@ -1871,7 +1871,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "1.3.3" +version = "1.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonpatch" }, @@ -1884,14 +1884,14 @@ dependencies = [ { name = "typing-extensions" }, { name = "uuid-utils" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d3/ae/8b74458fc3850ec3d150eb9f45e857db129dafa801fb5cf173dfc9f8bbf3/langchain_core-1.3.3.tar.gz", hash = "sha256:fa510a5db8efdc0c6ff41c0939fb5c00a0183c11f6b84233e892e3227ff69182", size = 915041, upload-time = "2026-05-05T19:02:36.612Z" } +sdist = { url = "https://files.pythonhosted.org/packages/59/de/679a53472c25860837e32c0442c962fa86e95317a36460e2c9d5c91b17c2/langchain_core-1.4.0.tar.gz", hash = "sha256:1dc341eed802ed9c117c0df3923c991e5e9e226571e5725c194eeb5bd93d1a7f", size = 920260, upload-time = "2026-05-11T18:42:35.919Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/01/4771b7ab2af1d1aba5b710bd8f13d9225c609425214b357590a17b01be77/langchain_core-1.3.3-py3-none-any.whl", hash = "sha256:18aae8506f37da7f74398492279a7d6efcee4f8e23c4c41c7af080eeb7ef7bd1", size = 543857, upload-time = "2026-05-05T19:02:34.52Z" }, + { url = "https://files.pythonhosted.org/packages/0f/1a/86c38c27b81913a1c6c12448cab55defb5a1097c7dc9a4cea83f55477a2d/langchain_core-1.4.0-py3-none-any.whl", hash = "sha256:23cbbdb46e38ddd1dd5247e6167e96013eae74bea4c5949c550809970a9e565c", size = 548120, upload-time = "2026-05-11T18:42:33.992Z" }, ] [[package]] name = "langchain-nvidia-ai-endpoints" -version = "1.3.0" +version = "1.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1899,9 +1899,9 @@ dependencies = [ { name = "langchain-core" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c5/2f/29036df9a99212f27369a123d2b44b5eec0ffb1b15b1277bf71cc0a37606/langchain_nvidia_ai_endpoints-1.3.0.tar.gz", hash = "sha256:5223aa7988ee5044f38715ae757faa0af4ba64f2ed0c82851a99c052592eaa09", size = 58015, upload-time = "2026-05-07T23:06:33.579Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/30/4acdd906ab2c5da2066d5951ee4fd60fc3a070395c4179b958d7945c543a/langchain_nvidia_ai_endpoints-1.4.0.tar.gz", hash = "sha256:dc43f907c32f5ce559718be1f80789ab84c570fff0e7ee1a50aa71f0424b574b", size = 58038, upload-time = "2026-05-21T03:45:22.316Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/34/dd21237e0534938061207ee733ef6da6c2dc62c9712932b379714817abc9/langchain_nvidia_ai_endpoints-1.3.0-py3-none-any.whl", hash = "sha256:cc2b356e96e86ffb92dcfe83980aa73227e1fad8f3a4cbdd76cdcf980c42e7cc", size = 63126, upload-time = "2026-05-07T23:06:32.585Z" }, + { url = "https://files.pythonhosted.org/packages/15/fa/f1aeaff47e6e98dde9f8c3e1b63607f97d4e0d6f2df6d52ee18b399bb5e2/langchain_nvidia_ai_endpoints-1.4.0-py3-none-any.whl", hash = "sha256:9557eda9d794373a601afbb9a74d15d650f0c8543d544d81f984bfc89b82d52f", size = 63146, upload-time = "2026-05-21T03:45:21.35Z" }, ] [[package]] @@ -1918,7 +1918,7 @@ wheels = [ [[package]] name = "langgraph" -version = "1.1.10" +version = "1.2.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, @@ -1928,35 +1928,35 @@ dependencies = [ { name = "pydantic" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9a/b3/7dec224369c7938eb3227ff69542a0d0f517862a0d27945b8c395f2a781f/langgraph-1.1.10.tar.gz", hash = "sha256:3115beb58203283c98d8752a90c034f3432177d2979a1fe205f76e5f1b744500", size = 560685, upload-time = "2026-04-27T17:19:10.426Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/5a/ffc12434ee8aecab830d58b4d204ddea45073eae7639c963310f671a5bf5/langgraph-1.2.2.tar.gz", hash = "sha256:f54a98458976b3ff0774683867df125fb52d8dbedeb2441d0b0656a51331cee5", size = 695730, upload-time = "2026-05-26T18:07:28.49Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/80/07/057dc1aa7991115fca53f1fa6573a7cc0dd296c05360c672cc67fdb6245b/langgraph-1.1.10-py3-none-any.whl", hash = "sha256:8a4f163f72f4401648d0c11b48ee906947d938ba8cf1f474540fe591534f0d17", size = 173750, upload-time = "2026-04-27T17:19:09.073Z" }, + { url = "https://files.pythonhosted.org/packages/42/9b/b08d578bba73e25351152dfd3d6d21e81210a5fff1b6f26e56f33197c8f5/langgraph-1.2.2-py3-none-any.whl", hash = "sha256:0a851bf4ba5939c5474a2fd57e6b439b5315283e254e42943bd392c2d71a5e03", size = 236376, upload-time = "2026-05-26T18:07:26.577Z" }, ] [[package]] name = "langgraph-checkpoint" -version = "4.0.3" +version = "4.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, { name = "ormsgpack" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7c/e1/885e49cdafceb4c74dae4573bc5dd6054c6c640382ee73104532f33dca46/langgraph_checkpoint-4.0.3.tar.gz", hash = "sha256:a7b5e2ca18fb79b55edf19396d4ee446f8a53dcb7a4ec62ce6f1c7e00bb5af7f", size = 174009, upload-time = "2026-04-27T14:34:02.777Z" } +sdist = { url = "https://files.pythonhosted.org/packages/83/47/886af6f886f0bff2273164a45f008694e48a96ff3cd25ff0228f2aa9480e/langgraph_checkpoint-4.1.1.tar.gz", hash = "sha256:6c2bdb530c91f91d7d9c1bd100925d0fc4f498d418c17f3587d1526279482a25", size = 184020, upload-time = "2026-05-22T16:57:38.503Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/19/ee/ecd3fa2e893746dde3b768daca2a4935208bc77d09445437ccfffb4a8c9b/langgraph_checkpoint-4.0.3-py3-none-any.whl", hash = "sha256:b91b765712a2311a5b198760f714b7ab9b376d01c047ed78d9b9a3e80df802a3", size = 51682, upload-time = "2026-04-27T14:34:01.51Z" }, + { url = "https://files.pythonhosted.org/packages/bd/b4/71425e3e38be92611300b9cc5e46a5bf98ab23f5ea8a75b73d02a2f1413c/langgraph_checkpoint-4.1.1-py3-none-any.whl", hash = "sha256:25d29144b082827218e7bc3f1e9b0566a4bb007895cd6cc26f66a8428739f56e", size = 56212, upload-time = "2026-05-22T16:57:37.203Z" }, ] [[package]] name = "langgraph-prebuilt" -version = "1.0.13" +version = "1.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, { name = "langgraph-checkpoint" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b5/a4/f8ac75fa7c503103f0cf7680944e28bbaaef74c19a8d163d7346869cc369/langgraph_prebuilt-1.0.13.tar.gz", hash = "sha256:ad219782a80e1718e7e7794de49e0ae307111d45cbcffab9a52725a66a609456", size = 172913, upload-time = "2026-04-30T01:48:15.742Z" } +sdist = { url = "https://files.pythonhosted.org/packages/29/66/ed9b93f56bc17ef22d551892f0ac2b225a97fe0fcf23a511b857f70d590b/langgraph_prebuilt-1.1.0.tar.gz", hash = "sha256:3c579cf6eed2d17f9c157c2d0fcaddcd8688524e7022d3b22b37a3bf4589d528", size = 178833, upload-time = "2026-05-12T03:37:49.332Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/69/ef/5ada0bef4013ef5ae53a0ca1de5736517f1076a54d313f156ca545ec65d5/langgraph_prebuilt-1.0.13-py3-none-any.whl", hash = "sha256:7055e9fad41fbd3593800aed0aea0a6e974b17f33ed51b80d3d3a031212dd7c0", size = 37214, upload-time = "2026-04-30T01:48:14.507Z" }, + { url = "https://files.pythonhosted.org/packages/e9/43/3fe1a700b8490ed02679cdbbc8c915eb23a092faf496c9c1118abcd10be3/langgraph_prebuilt-1.1.0-py3-none-any.whl", hash = "sha256:51e311747d755b751d5c6b39b0c1446124d3a7643d2515017e6714b323508fc9", size = 41043, upload-time = "2026-05-12T03:37:48.007Z" }, ] [[package]] @@ -2490,7 +2490,6 @@ all = [ { name = "nemotron-table-structure-v1" }, { name = "neo4j" }, { name = "nvidia-ml-py" }, - { name = "nvidia-riva-client" }, { name = "open-clip-torch" }, { name = "opencv-python-headless" }, { name = "psutil" }, @@ -2602,19 +2601,19 @@ requires-dist = [ { name = "glom", marker = "extra == 'service'" }, { name = "httpx", specifier = ">=0.27.0" }, { name = "lancedb" }, - { name = "langchain-nvidia-ai-endpoints", specifier = ">=0.3.0" }, - { name = "langgraph", marker = "extra == 'tabular'", specifier = ">=1.1.0a2" }, + { name = "langchain-nvidia-ai-endpoints", specifier = ">=1.4.0" }, + { name = "langgraph", marker = "extra == 'tabular'", specifier = ">=1.2.0" }, { name = "librosa", marker = "extra == 'multimedia'", specifier = ">=0.10.2" }, { name = "librosa", marker = "extra == 'service'", specifier = ">=0.10.2" }, - { name = "litellm", marker = "extra == 'llm'", specifier = ">=1.86.0rc1" }, + { name = "litellm", marker = "extra == 'llm'", specifier = ">=1.86.0,<2" }, { name = "markitdown" }, { name = "nemo-retriever", extras = ["benchmarks", "llm", "local", "multimedia", "nemotron-parse", "service", "tabular"], marker = "extra == 'all'" }, - { name = "nemotron-graphic-elements-v1", marker = "extra == 'local'", specifier = ">=0.dev0", index = "https://test.pypi.org/simple/" }, - { name = "nemotron-ocr", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'local') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'local')", specifier = ">=2.0.0.dev0", index = "https://test.pypi.org/simple/" }, - { name = "nemotron-page-elements-v3", marker = "extra == 'local'", specifier = ">=0.dev0", index = "https://test.pypi.org/simple/" }, - { name = "nemotron-table-structure-v1", marker = "extra == 'local'", specifier = ">=0.dev0", index = "https://test.pypi.org/simple/" }, + { name = "nemotron-graphic-elements-v1", marker = "extra == 'local'", specifier = "==1.0.0" }, + { name = "nemotron-ocr", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'local') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'local')", specifier = ">=2.0.0,<3" }, + { name = "nemotron-page-elements-v3", marker = "extra == 'local'", specifier = "==3.0.1" }, + { name = "nemotron-table-structure-v1", marker = "extra == 'local'", specifier = "==1.0.0" }, { name = "neo4j", marker = "extra == 'tabular'", specifier = ">=5.0" }, - { name = "nltk", specifier = ">=3.9.4" }, + { name = "nltk", specifier = "==3.9.4" }, { name = "numpy", specifier = ">=1.26.0" }, { name = "nvidia-ml-py", marker = "extra == 'local'" }, { name = "nvidia-riva-client", specifier = ">=2.25.1" }, @@ -2653,7 +2652,7 @@ requires-dist = [ { name = "tritonclient", marker = "extra == 'local'" }, { name = "typer", specifier = ">=0.12.0" }, { name = "universal-pathlib", specifier = ">=0.2.0" }, - { name = "urllib3", specifier = ">=2.7.0" }, + { name = "urllib3", specifier = "==2.7.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.30.0" }, { name = "vllm", marker = "sys_platform == 'linux' and extra == 'local'", specifier = "==0.20.0" }, ] @@ -2661,8 +2660,8 @@ provides-extras = ["service", "local", "multimedia", "nemotron-parse", "tabular" [[package]] name = "nemotron-graphic-elements-v1" -version = "1.0.0.dev20260508042302" -source = { registry = "https://test.pypi.org/simple/" } +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, { name = "matplotlib" }, @@ -2672,15 +2671,14 @@ dependencies = [ { name = "torch", version = "2.11.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, { name = "torch", version = "2.11.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] -sdist = { url = "https://test-files.pythonhosted.org/packages/f1/42/f4629e2cbaa9c8d7551258db494a06ede8b5e68bf9bd042b1bbc58721c38/nemotron_graphic_elements_v1-1.0.0.dev20260508042302.tar.gz", hash = "sha256:d7ca0dc49e75e332666666b90a756398deb1ab58f2e47f43418943f328095a5d", size = 40139, upload-time = "2026-05-08T04:23:39.699Z" } wheels = [ - { url = "https://test-files.pythonhosted.org/packages/bd/01/b036c64f7839e33fdc6e5f2573059ce9c2cc8634eb600487be636edeaf4f/nemotron_graphic_elements_v1-1.0.0.dev20260508042302-py3-none-any.whl", hash = "sha256:dcb59bad918124b702eb9a952518a3a7c2f7791bfc6acff2ebda8fa77d58f577", size = 34239, upload-time = "2026-05-08T04:23:38.699Z" }, + { url = "https://files.pythonhosted.org/packages/79/13/6d9b9c06aa58fe9c558dabb6d50532dbcfe98eba32e2e8975da2f83d01b7/nemotron_graphic_elements_v1-1.0.0-py3-none-any.whl", hash = "sha256:806b37f4fd740786105cf160769dd5506ca5dce8b4c65847b656e0ffd9cff5d6", size = 28738, upload-time = "2025-12-19T16:26:48.803Z" }, ] [[package]] name = "nemotron-ocr" -version = "2.0.0.dev20260512170901" -source = { registry = "https://test.pypi.org/simple/" } +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -2689,16 +2687,16 @@ dependencies = [ { name = "torch", version = "2.11.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "torchvision", version = "0.26.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] -sdist = { url = "https://test-files.pythonhosted.org/packages/42/21/6bda80c5b7d384a28f91582ba8faa91fe49ca591fa70eef69a1e85d1128e/nemotron_ocr-2.0.0.dev20260512170901.tar.gz", hash = "sha256:c88dc81a965cecdadf9f43248ccefed2db5807f5fdd9491b1f498111d483efe2", size = 155960, upload-time = "2026-05-12T17:19:39.383Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/ef/9dbba22f5de348a5f9c3af0488bf61258872926c40b7d513d71ef465b418/nemotron_ocr-2.0.0.tar.gz", hash = "sha256:84eb64f8af2ae12fbd83e38e482348ecce6a932b30946c873f8b8a95afae7355", size = 155817, upload-time = "2026-05-21T00:06:36.975Z" } wheels = [ - { url = "https://test-files.pythonhosted.org/packages/2e/75/c84b534b015386cec5046a36978689deb1b520a8d10a02154ad96180e8ae/nemotron_ocr-2.0.0.dev20260512170901-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:947778762a4d6f624c8f0d0262b9de079ba6cba0b2e9977ae25e3960768219ae", size = 36095211, upload-time = "2026-05-12T17:19:37.186Z" }, - { url = "https://test-files.pythonhosted.org/packages/27/c2/de710d55ac881e30ed719e08d32b25730cba546679aa2987bf3093d510e2/nemotron_ocr-2.0.0.dev20260512170901-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:11602defaee6eb01fa4f41403fb93ad10dfd966ea12c32f839a44160ddb4cdc5", size = 36806573, upload-time = "2026-05-12T17:20:45.594Z" }, + { url = "https://files.pythonhosted.org/packages/69/03/1d487d3bef63df377bd5f81311963ce24c4182984d47387bd8bf70f8ed20/nemotron_ocr-2.0.0-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:8bd3afc1dbfaae67cf20ec06b95d48056db8372e66fb46212cc302775734cb54", size = 36094927, upload-time = "2026-05-21T00:07:05.772Z" }, + { url = "https://files.pythonhosted.org/packages/1d/89/547df1d8c4a7fd97b49fc662078707d1f8b5740ce29fbb94db4cc6a3abd1/nemotron_ocr-2.0.0-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:fd5cf31259e236dd213edd36a4cdace2d4afc1972a5fad26e457804b1752d7de", size = 36806333, upload-time = "2026-05-21T00:06:34.545Z" }, ] [[package]] name = "nemotron-page-elements-v3" -version = "3.0.1.dev20260508042302" -source = { registry = "https://test.pypi.org/simple/" } +version = "3.0.1" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, { name = "loguru" }, @@ -2717,15 +2715,14 @@ dependencies = [ { name = "torchvision", version = "0.26.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, { name = "tqdm" }, ] -sdist = { url = "https://test-files.pythonhosted.org/packages/e2/0f/028f9900eb9f334860b0501fbaf5d8a450e61810af25e891ed243e0ca2ee/nemotron_page_elements_v3-3.0.1.dev20260508042302.tar.gz", hash = "sha256:09ede95108868aeda1cee4a5843505a4316c286440ac0492c5108f1fbfb24a07", size = 44759, upload-time = "2026-05-08T04:23:38.327Z" } wheels = [ - { url = "https://test-files.pythonhosted.org/packages/1f/25/e8748e66c3aa3fda317534e94419a862511d6c3a576c8fa67c2cd65147b0/nemotron_page_elements_v3-3.0.1.dev20260508042302-py3-none-any.whl", hash = "sha256:5ec2357880cdc13d63ec5a8bea409cc6e6f3b6a5c1d36a677ce069623786c2cf", size = 40035, upload-time = "2026-05-08T04:23:37.319Z" }, + { url = "https://files.pythonhosted.org/packages/62/e1/25e7c782b97113fc4a6bcedc8ec98899d9ee8e72f4320f524c93fd29747c/nemotron_page_elements_v3-3.0.1-py3-none-any.whl", hash = "sha256:d29c47e19594ae2c546634bfa5ceaeb17262752c3a0510137d6dec501cf29d99", size = 32761, upload-time = "2025-12-19T17:03:59.787Z" }, ] [[package]] name = "nemotron-table-structure-v1" -version = "1.0.0.dev20260508042302" -source = { registry = "https://test.pypi.org/simple/" } +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, { name = "matplotlib" }, @@ -2735,9 +2732,8 @@ dependencies = [ { name = "torch", version = "2.11.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" }, { name = "torch", version = "2.11.0+cu130", source = { registry = "https://download.pytorch.org/whl/cu130" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }, ] -sdist = { url = "https://test-files.pythonhosted.org/packages/93/8b/f1712dd0de02e28cd5197cd9a38e382d9b3d32cf4c9928bf6e913d1268a3/nemotron_table_structure_v1-1.0.0.dev20260508042302.tar.gz", hash = "sha256:7d5c0dfb21d877fe0731f340388d9e028662fba451b396f3fc2183bb59b57eca", size = 48641, upload-time = "2026-05-08T04:23:49.309Z" } wheels = [ - { url = "https://test-files.pythonhosted.org/packages/38/48/4522b45617e90676da194acc62806072ac94e96eb6ec689a5b07de877326/nemotron_table_structure_v1-1.0.0.dev20260508042302-py3-none-any.whl", hash = "sha256:e8884a49d169fc576f288b7ac7255e7594d8a198e944c038e073e2b7ce4df4ea", size = 39013, upload-time = "2026-05-08T04:23:48.039Z" }, + { url = "https://files.pythonhosted.org/packages/58/be/17551a3321df07138f8637e1481360e5f85407e3061af89a988da9f02f25/nemotron_table_structure_v1-1.0.0-py3-none-any.whl", hash = "sha256:e65b9fc66da9e7df30ef823ace23df36b377f27131c266a8adec005a775af3e3", size = 31832, upload-time = "2025-12-19T16:36:23.667Z" }, ] [[package]]