Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,13 @@

.dockerignore
.git/
Dockerfile
Dockerfile

# Build artifacts
target/

# Python virtual environments
.venv/
venv/
__pycache__/
*.pyc
56 changes: 56 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,62 @@ jobs:
files: lcov.info
fail_ci_if_error: false

sidecar-test:
name: Sidecar Python Tests
runs-on: ubuntu-latest
Comment thread
nicolasnoble marked this conversation as resolved.

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'

- name: Install sidecar dependencies
run: |
python -m pip install --upgrade pip
pip install -e "sidecar[dev]"

- name: Run sidecar unit tests (no MinIO)
run: |
cd sidecar
pytest tests/test_api.py -v

- name: Start MinIO for integration tests
run: |
docker run -d --name minio \
-p 9000:9000 -p 9001:9001 \
-e MINIO_ROOT_USER=minioadmin \
-e MINIO_ROOT_PASSWORD=minioadmin \
minio/minio:latest server /data --console-address ":9001"
# Wait for MinIO to be ready
sleep 5
# Create test bucket and upload test data
docker run --rm --network host --entrypoint /bin/sh minio/mc -c "
mc alias set myminio http://localhost:9000 minioadmin minioadmin &&
mc mb myminio/models --ignore-existing &&
echo '{\"model_type\": \"test\"}' > /tmp/config.json &&
mc cp /tmp/config.json myminio/models/test-model/config.json
"

- name: Run sidecar integration tests with MinIO
env:
AWS_ACCESS_KEY_ID: minioadmin
AWS_SECRET_ACCESS_KEY: minioadmin
AWS_ENDPOINT_URL: http://localhost:9000
AWS_REGION: us-east-1
run: |
cd sidecar
pytest tests/ -v

- name: Stop MinIO
if: always()
run: |
docker stop minio || true
docker rm minio || true

docker-build:
name: Docker Build
runs-on: ubuntu-latest
Expand Down
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,10 @@ logs/
models.db
**/__pycache__/
docs/FEEDBACK.md
docs/optimization-comparison.md

# Python
.venv/
Comment thread
nicolasnoble marked this conversation as resolved.
__pycache__/
*.pyc
*.pyo
*.egg-info/
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@ tonic-build = "0.13"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
futures = "0.3"
reqwest = { version = "0.12", features = ["json", "rustls-tls"], default-features = false }
uuid = { version = "1.17", features = ["v4", "serde"] }
thiserror = "2.0"
redis = { version = "0.27", features = ["tokio-comp", "connection-manager"] }
urlencoding = "2.1"

# Kubernetes client (kube-rs) for CRD-based metadata backend
kube = { version = "0.98", features = ["client", "runtime", "derive", "rustls-tls"], default-features = false }
Expand Down
30 changes: 25 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,23 @@ FROM nvcr.io/nvidia/base/ubuntu:noble-20250619 AS runtime

WORKDIR /app

# Install runtime dependencies
# Install runtime dependencies including Python for the sidecar
RUN apt-get update && \
apt-get install -y --no-install-recommends ca-certificates libssl-dev gpgv && \
apt-get install -y --no-install-recommends \
ca-certificates \
gpgv \
libssl-dev \
python3 \
python3-pip \
python3-venv && \
rm -rf /var/lib/apt/lists/*

# Create Python virtual environment and install sidecar package
COPY sidecar/pyproject.toml /app/sidecar/
COPY sidecar/src /app/sidecar/src
RUN python3 -m venv /app/venv && \
/app/venv/bin/pip install --no-cache-dir /app/sidecar
Comment thread
nicolasnoble marked this conversation as resolved.

# Copy all built binaries
COPY --from=builder /app/target/release/modelexpress-server .
COPY --from=builder /app/target/release/modelexpress-cli .
Expand All @@ -37,15 +49,23 @@ COPY --from=builder /app/target/release/fallback_test .
# Copy the Attribution files
COPY ATTRIBUTIONS_Rust.md .

# Expose the default port
# Copy the entrypoint script
COPY entrypoint.sh .
RUN chmod +x /app/entrypoint.sh

# Expose the default ports (gRPC server and sidecar)
EXPOSE 8001
EXPOSE 8002

# Set default environment variables (can be overridden)
ENV MODEL_EXPRESS_SERVER_PORT=8001
ENV MODEL_EXPRESS_SIDECAR_PORT=8002
ENV MODEL_EXPRESS_SIDECAR_ENDPOINT=http://127.0.0.1:8002
ENV MODEL_EXPRESS_LOG_LEVEL=info
ENV MODEL_EXPRESS_DATABASE_PATH=/app/models.db
ENV MODEL_EXPRESS_CACHE_DIRECTORY=/app/cache
ENV HF_HUB_CACHE=/app/cache
ENV PYTHONPATH=/app/sidecar/src

# Run the server by default
CMD ["./modelexpress-server"]
# Run both services via entrypoint
ENTRYPOINT ["/app/entrypoint.sh"]
51 changes: 51 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,58 @@ services:
dockerfile: Dockerfile
ports:
- "8001:8001"
- "8002:8002"
environment:
- MODEL_EXPRESS_SERVER_PORT=8001
- MODEL_EXPRESS_SIDECAR_PORT=8002
- MODEL_EXPRESS_SIDECAR_ENDPOINT=http://127.0.0.1:8002
- MODEL_EXPRESS_LOG_LEVEL=info
- MODEL_EXPRESS_CACHE_DIRECTORY=/app/cache
# S3 credentials for MinIO (optional, can be passed at runtime)
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-minioadmin}
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-minioadmin}
- AWS_ENDPOINT_URL=http://minio:9000
- AWS_REGION=us-east-1
volumes:
- model-cache:/app/cache
depends_on:
- minio
restart: unless-stopped

# MinIO for S3-compatible storage testing
minio:
image: minio/minio:latest
ports:
- "9000:9000"
- "9001:9001"
environment:
- MINIO_ROOT_USER=${MINIO_ROOT_USER:-minioadmin}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-minioadmin}
command: server /data --console-address ":9001"
volumes:
- minio-data:/data
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
restart: unless-stopped

# MinIO client for bucket initialization (optional)
minio-init:
image: minio/mc:latest
depends_on:
- minio
entrypoint: >
/bin/sh -c "
sleep 5;
mc alias set myminio http://minio:9000 minioadmin minioadmin;
mc mb myminio/models --ignore-existing;
mc anonymous set download myminio/models;
echo 'MinIO initialization complete';
exit 0;
"
Comment thread
nicolasnoble marked this conversation as resolved.

volumes:
model-cache:
minio-data:
56 changes: 56 additions & 0 deletions entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Entrypoint script that runs both the Model Streamer sidecar and the ModelExpress server.

set -e

# Configuration
SIDECAR_PORT="${MODEL_EXPRESS_SIDECAR_PORT:-8002}"
LOG_LEVEL="${MODEL_EXPRESS_LOG_LEVEL:-info}"

echo "Starting ModelExpress with Model Streamer sidecar..."

# Start the Python sidecar in the background
echo "Starting Model Streamer sidecar on port ${SIDECAR_PORT}..."
/app/venv/bin/python -m uvicorn \
modelexpress_sidecar.main:app \
--host 127.0.0.1 \
--port "${SIDECAR_PORT}" \
--log-level "${LOG_LEVEL}" &

SIDECAR_PID=$!

# Wait for sidecar to be ready
echo "Waiting for sidecar to be ready..."
MAX_RETRIES=30
RETRY_COUNT=0
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
if /app/venv/bin/python -c "import httpx; httpx.get('http://127.0.0.1:${SIDECAR_PORT}/health', timeout=2)" > /dev/null 2>&1; then
echo "Sidecar is ready!"
break
fi
RETRY_COUNT=$((RETRY_COUNT + 1))
sleep 1
done

if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
echo "Warning: Sidecar health check timed out, proceeding anyway..."
fi

# Trap to ensure cleanup on exit
cleanup() {
echo "Shutting down..."
if [ -n "$SIDECAR_PID" ]; then
kill "$SIDECAR_PID" 2>/dev/null || true
fi
}
trap cleanup EXIT INT TERM

# Start the main server. Run it in the background and wait so the shell stays
# alive to handle signals and the cleanup trap can fire to kill the sidecar.
echo "Starting ModelExpress server..."
/app/modelexpress-server "$@" &
SERVER_PID=$!
wait $SERVER_PID
2 changes: 2 additions & 0 deletions modelexpress_common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ chrono = { workspace = true }
clap = { workspace = true }
config = { workspace = true }
jiff = { workspace = true }
reqwest = { workspace = true }
urlencoding = { workspace = true }

[dev-dependencies]
mockall = "0.13"
Expand Down
1 change: 1 addition & 0 deletions modelexpress_common/proto/model.proto
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ enum ModelStatus {
// Enum representing model providers
enum ModelProvider {
HUGGING_FACE = 0;
MODEL_STREAMER = 1;
}

// Request for streaming model files
Expand Down
Loading