Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
needs: build-base
# always() forces job run even if the dependant is skipped (but not if it failed)
if: always() && (needs.build-base.result == 'success' || needs.build-base.result == 'skipped')
runs-on: blacksmith-8vcpu-ubuntu-2204
runs-on: blacksmith-16vcpu-ubuntu-2404
steps:
- name: Checkout
uses: actions/checkout@v4
Expand All @@ -64,15 +64,15 @@ jobs:
- 'official-templates/pytorch/**'

- name: Setup Docker
if: steps.changes.outputs.pytorch_any_changed == 'true'
if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.pytorch_any_changed == 'true'
uses: ./.github/actions/docker-setup
id: setup
with:
dockerhub-username: ${{ secrets.DOCKERHUB_USERNAME }}
dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build pytorch images
if: steps.changes.outputs.pytorch_any_changed == 'true'
if: github.event_name == 'workflow_dispatch' || steps.changes.outputs.pytorch_any_changed == 'true'
uses: docker/bake-action@v6
env:
BUILDX_BAKE_ENTITLEMENTS_FS: 0
Expand Down
41 changes: 41 additions & 0 deletions .github/workflows/nvidia.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Nvidia Image Build

on:
push:
paths:
- ".github/workflows/nvidia.yml"
- "official-templates/nvidia-*/**"
workflow_dispatch:

permissions:
contents: read
packages: write

jobs:
build-nvidia:
runs-on: blacksmith-16vcpu-ubuntu-2404
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Docker
uses: ./.github/actions/docker-setup
id: setup
with:
dockerhub-username: ${{ secrets.DOCKERHUB_USERNAME }}
dockerhub-token: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build nvidia images
uses: docker/bake-action@v6
env:
BUILDX_BAKE_ENTITLEMENTS_FS: 0
RELEASE_SUFFIX: ${{ steps.setup.outputs.release-suffix }}
with:
source: .
files: |
official-templates/shared/versions.hcl
official-templates/nvidia-pytorch/docker-bake.hcl
push: true

2 changes: 2 additions & 0 deletions container-template/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,6 @@ export_env_vars

echo "Start script(s) finished, Pod is ready to use."

execute_script "/post_start.sh" "Running post-start script..."

sleep infinity
88 changes: 46 additions & 42 deletions official-templates/base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
ARG BASE_IMAGE=non-existing
FROM ${BASE_IMAGE}

ARG RP_SKIP_PYTHON
ARG RP_SKIP_JUPYTER

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

ENV SHELL=/bin/bash
Expand Down Expand Up @@ -44,53 +47,54 @@ RUN apt-get update --yes && \
libsm6 libssl-dev libswscale-dev libtiff-dev libv4l-dev libx264-dev libxrender-dev \
libxvidcore-dev lsof make mtr nano nfs-common nginx openssh-server rsync slurm-wlm \
software-properties-common sudo tmux unzip vim wget zip zstd

# Add the Python PPA
RUN add-apt-repository ppa:deadsnakes/ppa -y

# Install Python
RUN apt-get install --yes --no-install-recommends \
python3.9-dev python3.9-venv python3.9-distutils \
python3.10-dev python3.10-venv python3.10-distutils \
python3.11-dev python3.11-venv python3.11-distutils \
python3.12-dev python3.12-venv \
python3.13-dev python3.13-venv && \

# Install Python versions
RUN [[ -n $RP_SKIP_PYTHON ]] && exit 0; \
add-apt-repository ppa:deadsnakes/ppa -y && \
apt-get install --yes --no-install-recommends \
python3.9-dev python3.9-venv python3.9-distutils \
python3.10-dev python3.10-venv python3.10-distutils \
python3.11-dev python3.11-venv python3.11-distutils \
python3.12-dev python3.12-venv \
python3.13-dev python3.13-venv && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Install pip if we are not ROCm
RUN if [ -z "${ROCM_PATH}" ]; then \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python3.9 get-pip.py && \
python3.10 get-pip.py && \
python3.11 get-pip.py && \
python3.12 get-pip.py && \
python3.13 get-pip.py && \
rm get-pip.py; \
fi

# ensurepip (it's there, we just installed it^) and install virtualenv
RUN if [ -z "${ROCM_PATH}" ]; then \
python3.9 -m pip install --upgrade pip virtualenv && \
python3.10 -m pip install --upgrade pip virtualenv && \
python3.11 -m pip install --upgrade pip virtualenv && \
python3.12 -m pip install --upgrade pip virtualenv && \
python3.13 -m pip install --upgrade pip virtualenv; \
fi

RUN ln -sf /usr/bin/python3.12 /usr/local/bin/python
RUN ln -sf /usr/local/bin/pip3.12 /usr/local/bin/pip
RUN ln -sf /usr/local/bin/pip3.12 /usr/local/bin/pip3
rm -rf /var/lib/apt/lists/*;

# Install pip
RUN [[ -n $RP_SKIP_PYTHON ]] && exit 0; \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python3.9 get-pip.py && \
python3.10 get-pip.py && \
python3.11 get-pip.py && \
python3.12 get-pip.py && \
python3.13 get-pip.py && \
rm get-pip.py;

# Install virtualenv
RUN [[ -n $RP_SKIP_PYTHON ]] && exit 0; \
python3.9 -m pip install --upgrade pip virtualenv && \
python3.10 -m pip install --upgrade pip virtualenv && \
python3.11 -m pip install --upgrade pip virtualenv && \
python3.12 -m pip install --upgrade pip virtualenv && \
python3.13 -m pip install --upgrade pip virtualenv;

# Symlink default python/pip
RUN [[ -n $RP_SKIP_PYTHON ]] && exit 0; \
ln -sf /usr/bin/python3.12 /usr/local/bin/python && \
ln -sf /usr/local/bin/pip3.12 /usr/local/bin/pip && \
ln -sf /usr/local/bin/pip3.12 /usr/local/bin/pip3;

COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

# Install Jupyter and related packages
RUN python -m pip install --upgrade --no-cache-dir \
jupyterlab \
ipywidgets \
jupyter-archive \
notebook==7.4.2
# Install Jupyter and hf_transfer packages
RUN [[ -n $RP_SKIP_JUPYTER ]] && exit 0; \
python -m pip install --upgrade --no-cache-dir \
hf_transfer \
jupyterlab \
ipywidgets \
jupyter-archive \
notebook==7.4.2;

# Install filebrowser
RUN curl -LsSf https://raw.githubusercontent.com/filebrowser/get/master/get.sh | bash
Expand Down
16 changes: 16 additions & 0 deletions official-templates/nvidia-pytorch/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# NVIDIA PyTorch Base Image

NVIDIA's PyTorch NGC Container (`nvcr.io/nvidia/pytorch`) built for easy deployment on Runpod.

For more information on the NGC images visit https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch.

## Deployment

Please use `runpod/nvidia-pytorch:1.0.3-25.11`

## Build

```bash
./bake.sh nvidia-pytorch
```

28 changes: 28 additions & 0 deletions official-templates/nvidia-pytorch/docker-bake.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
group "default" {
targets = ["pytorch-2511"]
}

target "nvidia-base" {
context = "official-templates/base"
dockerfile = "Dockerfile"
platforms = ["linux/amd64"]
contexts = {
scripts = "container-template"
proxy = "container-template/proxy"
logo = "container-template"
}
args = {
RP_SKIP_PYTHON = "1"
RP_SKIP_JUPYTER = "1"
}
}

target "pytorch-2511" {
inherits = ["nvidia-base"]
tags = [
"runpod/nvidia-pytorch:${RELEASE_VERSION}${RELEASE_SUFFIX}-25.11",
]
args = {
BASE_IMAGE = "nvcr.io/nvidia/pytorch:25.11-py3"
}
}
16 changes: 13 additions & 3 deletions official-templates/pytorch/docker-bake.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

variable "TORCH_META" {
default = {
"2.9.1" = {}
"2.9.0" = {
torchvision = "0.24.0"
}
"2.8.0" = {
torchvision = "0.23.0"
}
Expand All @@ -21,14 +25,20 @@ variable "CUDA_TORCH_COMBINATIONS" {
{ cuda_version = "12.8.1", torch = "2.6.0", whl_src = "126" },
{ cuda_version = "12.8.1", torch = "2.7.1", whl_src = "128" },
{ cuda_version = "12.8.1", torch = "2.8.0", whl_src = "128" },
{ cuda_version = "12.8.1", torch = "2.9.0", whl_src = "128" },
{ cuda_version = "12.8.1", torch = "2.9.1", whl_src = "128" },

{ cuda_version = "12.9.0", torch = "2.6.0", whl_src = "126" },
{ cuda_version = "12.9.0", torch = "2.7.1", whl_src = "128" },
{ cuda_version = "12.9.0", torch = "2.8.0", whl_src = "129" },
{ cuda_version = "12.9.0", torch = "2.9.0", whl_src = "129" },
{ cuda_version = "12.9.0", torch = "2.9.1", whl_src = "129" },

{ cuda_version = "13.0.0", torch = "2.6.0", whl_src = "126" },
{ cuda_version = "13.0.0", torch = "2.7.1", whl_src = "128" },
{ cuda_version = "13.0.0", torch = "2.8.0", whl_src = "129" }
{ cuda_version = "13.0.0", torch = "2.8.0", whl_src = "129" },
{ cuda_version = "13.0.0", torch = "2.9.0", whl_src = "130" },
{ cuda_version = "13.0.0", torch = "2.9.1", whl_src = "130" },
]
}

Expand All @@ -44,7 +54,7 @@ variable "COMPATIBLE_BUILDS" {
wheel_src = combo.whl_src
torch = combo.torch
torch_code = replace(combo.torch, ".", "")
torch_vision = TORCH_META[combo.torch].torchvision
torch_vision = lookup(TORCH_META[combo.torch], "torchvision", "")
} if cuda.version == combo.cuda_version && contains(cuda.ubuntu, ubuntu.version)
]
]
Expand Down Expand Up @@ -80,7 +90,7 @@ target "pytorch-matrix" {
args = {
BASE_IMAGE = "runpod/base:${RELEASE_VERSION}${RELEASE_SUFFIX}-cuda${build.cuda_code}-${build.ubuntu_name}"
WHEEL_SRC = build.wheel_src
TORCH = "torch==${build.torch} torchvision==${build.torch_vision} torchaudio==${build.torch}"
TORCH = "torch==${build.torch}${build.torch_vision != "" ? " torchvision==${build.torch_vision}" : ""} torchaudio==${build.torch}"
}

tags = [
Expand Down
3 changes: 3 additions & 0 deletions official-templates/rocm/docker-bake.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ target "rocm-base" {
proxy = "container-template/proxy"
logo = "container-template"
}
args = {
RP_SKIP_PYTHON = "1"
}
}

target "rocm644-ubuntu2204-pytorch251" {
Expand Down
2 changes: 1 addition & 1 deletion official-templates/shared/versions.hcl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
RELEASE_VERSION = "1.0.2"
RELEASE_VERSION = "1.0.3"

variable "RELEASE_SUFFIX" {
default = "" # Set by CI, not used by humans.
Expand Down
Loading