Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deepstream-vision/detector/.dockerignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Extraction script (not needed in container)
extract_engine.sh

# Note: Pre-built engine files (*.engine) are now included in the image for fast startup
# Pre-built engine files are device-specific and auto-generated at first startup
6 changes: 2 additions & 4 deletions deepstream-vision/detector/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,8 @@ COPY --from=builder /compiled/yolo11n.onnx /app/
# Copy labels from the export (auto-generated by export script)
COPY --from=builder /compiled/labels.txt /app/labels_exported.txt

# Pre-built TensorRT engine for fast startup (skip 8+ minute build time)
# Engine was built using NvDsInferYoloCudaEngineGet from the custom YOLO library
# on Jetson Orin with DeepStream 7.1 + CUDA 12.6 + TensorRT 10.x
COPY model_b2_gpu0_fp16.engine /app/
# TensorRT engine is auto-generated by nvinfer on first startup from yolo11n.onnx
# using the NvDsInferYoloCudaEngineGet custom engine builder (~5-10 min on first run)

# Copy application files
COPY detector.py /app/
Expand Down
7 changes: 3 additions & 4 deletions deepstream-vision/vlm/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Qwen3-VL Vision-Language Model Service
# Optimized for Jetson Orin Nano with INT4 quantization using Jetson-compatible bitsandbytes
# Model is baked into the image for fast startup (no downloads needed)
# Model is downloaded on first startup via HuggingFace Hub (~4GB)

FROM ubuntu:22.04

Expand All @@ -17,9 +17,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*

# Copy pre-downloaded Qwen3-VL-2B model EARLY for better layer caching
# Download once with: huggingface-cli download Qwen/Qwen3-VL-2B-Instruct --local-dir ./models/Qwen3-VL-2B-Instruct
COPY models/Qwen3-VL-2B-Instruct /app/models/Qwen3-VL-2B-Instruct
# Model is downloaded at runtime on first startup via HuggingFace Hub
# (Qwen/Qwen3-VL-2B-Instruct, ~4GB, public - no token needed)

# Upgrade pip
RUN pip3 install --no-cache-dir --upgrade pip
Expand Down
6 changes: 3 additions & 3 deletions deepstream-vision/vlm/qwen3_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def add_cors_headers(response):
model_loaded = False

# Configuration
MODEL_PATH = "/app/models/Qwen3-VL-2B-Instruct"
MODEL_PATH = "Qwen/Qwen3-VL-2B-Instruct"
MODEL_NAME = "Qwen3-VL-2B-Instruct"
MAX_IMAGE_SIZE = 672 # Resize large images for faster processing

Expand Down Expand Up @@ -100,7 +100,7 @@ def load_model():
processor = AutoProcessor.from_pretrained(
MODEL_PATH,
trust_remote_code=True,
local_files_only=True
local_files_only=False
)
logger.info("Processor loaded successfully")

Expand Down Expand Up @@ -130,7 +130,7 @@ def load_model():
"trust_remote_code": True,
"quantization_config": quantization_config,
"device_map": 'cuda',
"local_files_only": True,
"local_files_only": False,
}
if attn_impl:
model_kwargs["attn_implementation"] = attn_impl
Expand Down
3 changes: 1 addition & 2 deletions python/pipecat-assistant/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ RUN uv pip install --system -r requirements.txt
COPY server/app.py ./
COPY server/sounds ./sounds

# Copy .env file (contains API keys)
COPY server/.env ./
# API keys (DEEPGRAM_API_KEY, XAI_API_KEY) are provided as runtime environment variables

# Copy the built frontend
COPY --from=frontend-builder /app/frontend/dist ./frontend/dist
Expand Down