From b6b9e1ed11bd8c9b61f7318ffbc88ae37488b23a Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Wed, 6 May 2026 11:03:00 -0400 Subject: [PATCH 1/3] Adjust location that uv creates venv --- Dockerfile | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 54a0fc22eb..b26aeb1c95 100644 --- a/Dockerfile +++ b/Dockerfile @@ -63,10 +63,12 @@ RUN sed -i 's/# deb-src/deb-src/' /etc/apt/sources.list \ done \ && apt-get clean +ENV UV_INSTALL_DIR=/opt/uv RUN curl -LsSf https://astral.sh/uv/install.sh | sh -ENV PATH=/root/.local/bin:$PATH +ENV PATH=/opt/uv/bin:$PATH ENV UV_LINK_MODE=copy +ENV UV_PYTHON_INSTALL_DIR=/opt/uv/python RUN --mount=type=cache,target=/root/.cache/uv \ uv python install 3.12 \ @@ -90,9 +92,6 @@ WORKDIR /workspace COPY data data COPY nemo_retriever nemo_retriever -# ENV VIRTUAL_ENV=/opt/retriever_runtime -# ENV PATH=/opt/retriever_runtime/bin:/root/.local/bin:$PATH -# ENV LD_LIBRARY_PATH=/opt/retriever_runtime/lib:${LD_LIBRARY_PATH} # --------------------------------------------------------------------------- # Install nemo_retriever and path deps (build context = repo root) @@ -109,7 +108,7 @@ ENV PYTHONUNBUFFERED=1 # Activate venv by default so CLI and python see nemo_retriever; mount over /workspace for dev. ENV VIRTUAL_ENV=/opt/retriever_runtime -ENV PATH=/opt/retriever_runtime/bin:/root/.local/bin:$PATH +ENV PATH=/opt/retriever_runtime/bin:/opt/uv/bin:$PATH # Editable install: at runtime, -v host_repo:/workspace overrides these dirs so dev changes apply. SHELL ["/bin/bash", "-c"] @@ -143,10 +142,7 @@ FROM install AS service ENV NEMO_RETRIEVER_SERVICE_CONFIG=/etc/nemo-retriever/retriever-service.yaml -ENV PATH=/opt/retriever_runtime/bin:$PATH - -RUN chmod -R a+rX /root/.local \ - && groupadd -r nemo && useradd -r -g nemo -d /workspace -s /sbin/nologin nemo \ +RUN groupadd -r nemo && useradd -r -g nemo -d /workspace -s /sbin/nologin nemo \ && mkdir -p /etc/nemo-retriever /var/lib/nemo-retriever \ && cp /workspace/nemo_retriever/src/nemo_retriever/service/retriever-service.yaml \ "${NEMO_RETRIEVER_SERVICE_CONFIG}" \ From 34128279692fdf6e93581352fd5504a1390681dc Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 19 May 2026 22:07:01 +0000 Subject: [PATCH 2/3] Fix PyPI publish wheel path and add artifact listing step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit upload-artifact flattens nemo_retriever/dist into ./dist on download; use find instead of ./dist/nemo_retriever/dist/*. Re-run failed jobs does not pick up workflow changes — dispatch a new run after merging. --- .github/workflows/reusable-pypi-publish.yml | 24 ++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/.github/workflows/reusable-pypi-publish.yml b/.github/workflows/reusable-pypi-publish.yml index 618042345c..e5b0000b9b 100644 --- a/.github/workflows/reusable-pypi-publish.yml +++ b/.github/workflows/reusable-pypi-publish.yml @@ -21,6 +21,12 @@ jobs: name: python-wheels path: ./dist + - name: List downloaded wheel artifacts + run: | + echo "Contents of ./dist after download-artifact:" + find ./dist -type f -ls 2>/dev/null || true + find ./dist -type d 2>/dev/null || true + - name: Setup Python uses: actions/setup-python@v6 with: @@ -35,9 +41,17 @@ jobs: ARTIFACTORY_USERNAME: ${{ secrets.ARTIFACTORY_USERNAME }} ARTIFACTORY_PASSWORD: ${{ secrets.ARTIFACTORY_PASSWORD }} run: | - # Publish all wheels + # upload-artifact strips to the common parent (nemo_retriever/dist/), so + # downloaded files land directly under ./dist/, not ./dist/nemo_retriever/dist/. + mapfile -t DIST_FILES < <(find ./dist -type f \( -name '*.whl' -o -name '*.tar.gz' \)) + if [ "${#DIST_FILES[@]}" -eq 0 ]; then + echo "::error::No wheel or sdist files under ./dist" + find ./dist -type f || true + exit 1 + fi + printf 'Publishing:\n%s\n' "${DIST_FILES[@]}" twine upload --verbose \ - --repository-url $ARTIFACTORY_URL \ - -u $ARTIFACTORY_USERNAME \ - -p $ARTIFACTORY_PASSWORD \ - ./dist/nemo_retriever/dist/* + --repository-url "$ARTIFACTORY_URL" \ + -u "$ARTIFACTORY_USERNAME" \ + -p "$ARTIFACTORY_PASSWORD" \ + "${DIST_FILES[@]}" From 3a8a9bbb989bb297f0d12e89f761d20ef283b014 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Sun, 31 May 2026 19:14:19 -0400 Subject: [PATCH 3/3] feat: add /v1/test endpoint to the retriever service - New router module routes/test.py with a dep-free health-check route - Returns status, mode, and Python runtime info - Wired into app.py under /v1 prefix (no external dependencies) --- .../src/nemo_retriever/service/app.py | 7 +-- .../nemo_retriever/service/routers/test.py | 43 +++++++++++++++++++ 2 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 nemo_retriever/src/nemo_retriever/service/routers/test.py diff --git a/nemo_retriever/src/nemo_retriever/service/app.py b/nemo_retriever/src/nemo_retriever/service/app.py index f7cef45b14..290dfb840f 100644 --- a/nemo_retriever/src/nemo_retriever/service/app.py +++ b/nemo_retriever/src/nemo_retriever/service/app.py @@ -271,14 +271,15 @@ def create_app(config: ServiceConfig) -> FastAPI: else: logger.info("Bearer-token authentication DISABLED (no api_token configured)") - from nemo_retriever.service.routers import admin, ingest, metrics + from nemo_retriever.service.routers import admin, ingest, metrics, test from nemo_retriever.service.services.prometheus import instrument_app app.include_router(ingest.router, prefix="/v1") app.include_router(metrics.router, prefix="/v1") - # Admin/internal endpoints — pool_stats etc. Registered on every - # role; the handler self-reports an empty pool dict on gateway pods. + # Admin/internal endpoints — pool_stats etc. app.include_router(admin.router, prefix="/v1") + # General-purpose smoke-test route — no external deps. + app.include_router(test.router, prefix="/v1") instrument_app(app, role=config.mode) if config.mode == "gateway": diff --git a/nemo_retriever/src/nemo_retriever/service/routers/test.py b/nemo_retriever/src/nemo_retriever/service/routers/test.py new file mode 100644 index 0000000000..560ad326d1 --- /dev/null +++ b/nemo_retriever/src/nemo_retriever/service/routers/test.py @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Test endpoint for the retriever service. + +Provides a lightweight, no-dependency health-check route that validates +the Python runtime and the current service mode are reachable at all. +""" + +from __future__ import annotations + +import logging +import platform +import sys + +from fastapi import APIRouter, Request + +logger = logging.getLogger(__name__) + +router = APIRouter(tags=["test"], include_in_schema=True) + + +@router.get("/test", summary="Health-check that validates the Python runtime") +async def test(request: Request) -> dict: + """Return a JSON blob describing the current process environment. + + Response shape:: + + { + "status": "ok", + "mode": "gateway" | "realtime" | "batch" | "standalone", + "python": "3.12.1+linux", + } + + Intended for cluster probes, load-balancer heart-beats, and manual + smoke-tests -- it has no external dependencies (no DB, no pipeline + pool, no media binaries). + """ + config = getattr(request.app.state, "config", None) + mode = config.mode if config is not None else "unknown" + runtime = f"{sys.version.split()[0]}; {'/'.join(platform.system().split())}" + return {"status": "ok", "mode": mode, "python": runtime}