Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .tekton/instaslice-daemonset-next-pull-request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ spec:
description: Execute the build with network isolation
name: hermetic
type: string
- default: ""
- default: '{"type": "rpm", "path": "./konflux"}'
description: Build dependencies to be prefetched by Cachi2
name: prefetch-input
type: string
Expand Down
2 changes: 1 addition & 1 deletion .tekton/instaslice-daemonset-next-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ spec:
description: Execute the build with network isolation
name: hermetic
type: string
- default: ""
- default: '{"type": "rpm", "path": "./konflux"}'
description: Build dependencies to be prefetched by Cachi2
name: prefetch-input
type: string
Expand Down
35 changes: 34 additions & 1 deletion Dockerfile.daemonset.ocp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,40 @@ WORKDIR /workspace
COPY . .
RUN GO_BUILD_PACKAGES=./cmd/das-daemonset make

FROM nvcr.io/nvidia/cuda:13.1.1-base-ubi9
# Build stage to install libnvidia-ml package
FROM registry.redhat.io/ubi9/ubi@sha256:cecb1cde7bda7c8165ae27841c2335667f8a3665a349c0d051329c61660a496c as nvidia-libs

RUN printf '%s\n' \
'[ml-$basearch-baseos-rpms]' \
'name=cuda-rhel-9 repo' \
'baseurl=https://internal.console.redhat.com/api/pulp-content/rhel-ai/cuda-rhel-9-x86_64/' \
'enabled=1' \
'gpgcheck=0' \
'repo_gpgcheck=0' \
> /etc/yum.repos.d/cuda-rhel-9.repo

# NVIDIA driver version must match GPU Operator installation
# GPU Operator 25.10.1 uses driver 580.105.08 (see https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/release-notes.html)
ARG NVIDIA_DRIVER_VERSION=580.105.08

# Install libnvidia-ml package from internal CUDA repo
# Version must match konflux/rpms.lock.yaml for hermetic builds with cachi2
# Enable nvidia-driver:580 module stream to avoid modular filtering
RUN dnf module enable -y nvidia-driver:580 && \
dnf install --nogpgcheck --setopt=repo_gpgcheck=0 -y libnvidia-ml-${NVIDIA_DRIVER_VERSION}-1.el9 && \
INSTALLED_VERSION=$(rpm -q --queryformat '%{VERSION}' libnvidia-ml) && \
if [ "$INSTALLED_VERSION" != "${NVIDIA_DRIVER_VERSION}" ]; then \
echo "ERROR: Installed libnvidia-ml version ($INSTALLED_VERSION) does not match expected version (${NVIDIA_DRIVER_VERSION})"; \
exit 1; \
fi && \
echo "SUCCESS: Verified libnvidia-ml version ${INSTALLED_VERSION}"

# Final stage - use clean UBI9 minimal to avoid CVEs from NVIDIA base image (CVE-2025-66471, CVE-2026-21441)
FROM registry.redhat.io/ubi9/ubi-minimal@sha256:6fc28bcb6776e387d7a35a2056d9d2b985dc4e26031e98a2bd35a7137cd6fd71

# Copy NVIDIA libraries from build stage and create symlink for go-nvml
COPY --from=nvidia-libs /usr/lib64/libnvidia-ml.so* /usr/lib64/
RUN ln -s /usr/lib64/libnvidia-ml.so.1 /usr/lib64/libnvidia-ml.so

ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=all
Expand Down
6 changes: 6 additions & 0 deletions konflux/ml.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[ml-$basearch-baseos-rpms]
name = cuda-rhel-9 repo
baseurl = https://internal.console.redhat.com/api/pulp-content/rhel-ai/cuda-rhel-9-x86_64/
enabled = 1
gpgcheck = 0
repo_gpgcheck = 0
8 changes: 6 additions & 2 deletions konflux/rpms.in.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
packages: [jq]
packages:
- jq
- libnvidia-ml
modules:
- nvidia-driver:580
contentOrigin:
repofiles: ["./ubi.repo"]
repofiles: ["./ubi.repo", "./ml.repo"]
arches: [x86_64]
30 changes: 22 additions & 8 deletions konflux/rpms.lock.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,33 @@ lockfileVendor: redhat
arches:
- arch: x86_64
packages:
- url: https://cdn-ubi.redhat.com/content/public/ubi/dist/ubi9/9/x86_64/appstream/os/Packages/j/jq-1.6-15.el9.x86_64.rpm
repoid: ubi-9-for-x86_64-appstream-rpms
size: 194271
checksum: sha256:d3157267cce88006c2ad3327ea7eb8983bea6f69327c157228b89814a3c473ae
name: jq
evr: 1.6-15.el9
sourcerpm: jq-1.6-15.el9.src.rpm
- url: https://cdn-ubi.redhat.com/content/public/ubi/dist/ubi9/9/x86_64/appstream/os/Packages/o/oniguruma-6.9.6-1.el9.5.x86_64.rpm
repoid: ubi-9-for-x86_64-appstream-rpms
size: 226331
checksum: sha256:6c884cc2216e5b4699ebd8cde27b39e99532520b367f645ed6cc660d081916dc
name: oniguruma
evr: 6.9.6-1.el9.5
sourcerpm: oniguruma-6.9.6-1.el9.5.src.rpm
- url: https://cdn-ubi.redhat.com/content/public/ubi/dist/ubi9/9/x86_64/baseos/os/Packages/j/jq-1.6-19.el9.x86_64.rpm
repoid: ubi-9-for-x86_64-baseos-rpms
size: 191662
checksum: sha256:6b4d82714813d7b4a3200bf2856a3c1493d186e9caa916d7a700ec25e4996462
name: jq
evr: 1.6-19.el9
sourcerpm: jq-1.6-19.el9.src.rpm
# NVIDIA driver version must match GPU Operator 25.10.1 (driver 580.105.08)
# See NVIDIA_DRIVER_VERSION in Dockerfile.daemonset.ocp
- url: https://internal.console.redhat.com/api/pulp-content/rhel-ai/cuda-rhel-9-x86_64/Packages/l/libnvidia-ml-580.105.08-1.el9.x86_64.rpm
repoid: ml-x86_64-baseos-rpms
size: 685446
checksum: sha256:b261f01cf5bd6cf22c3624e53a6d1df7a20a76bd6a7b2b917c607a153c69dcb1
name: libnvidia-ml
evr: 3:580.105.08-1.el9
sourcerpm: nvidia-driver-580.105.08-1.el9.src.rpm
module: nvidia-driver:580
source: []
module_metadata: []
module_metadata:
- url: https://internal.console.redhat.com/api/pulp-content/rhel-ai/cuda-rhel-9-x86_64/repodata/1fa7d4f1fec7de205bb2157b34470bd56918ad67c42e386048a76b169a8b8e57-modules.yaml
repoid: ml-x86_64-baseos-rpms
size: 5394607
checksum: sha256:1fa7d4f1fec7de205bb2157b34470bd56918ad67c42e386048a76b169a8b8e57