Skip to content

Commit f02cd3e

Browse files
Merge pull request #7 from FootprintAI/feat--support-cuda-vulkan
feat: add prebuilt CUDA and Vulkan static libraries
2 parents 99e2aca + c7af93e commit f02cd3e

43 files changed

Lines changed: 261 additions & 24 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Dockerfile.libs

Lines changed: 163 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,28 @@
11
# Dockerfile.libs — build linux-amd64 static libraries for llama.cpp and whisper.cpp
22
#
3-
# Usage:
3+
# Usage (CPU, default):
44
# docker build -f Dockerfile.libs -o ./out .
55
#
6+
# Usage (CUDA):
7+
# docker build -f Dockerfile.libs --build-arg GPU_BACKEND=cuda -o ./out .
8+
#
9+
# Usage (Vulkan):
10+
# docker build -f Dockerfile.libs --build-arg GPU_BACKEND=vulkan -o ./out .
11+
#
612
# Build + link test (ensures .a files link correctly):
713
# docker build -f Dockerfile.libs --target build-test .
814
#
915
# This extracts prebuilt .a files + headers into ./out/ on the host.
1016

11-
FROM golang:1.24-bookworm AS builder
17+
ARG GPU_BACKEND=cpu
18+
19+
# ============================================================================
20+
# Stage: Download sources (shared by all backends)
21+
# ============================================================================
22+
FROM golang:1.24-bookworm AS sources
1223

1324
RUN apt-get update && apt-get install -y --no-install-recommends \
14-
build-essential cmake wget && \
25+
wget && \
1526
rm -rf /var/lib/apt/lists/*
1627

1728
WORKDIR /src
@@ -37,12 +48,25 @@ RUN WHISPER_VERSION=$(go run ./cmd/versioncmd whisper.cpp) && \
3748
tar xzf whisper.cpp.tar.gz --strip-components=1 -C whisper-src && \
3849
rm whisper.cpp.tar.gz
3950

40-
# Build llama.cpp
51+
# ============================================================================
52+
# Builder: CPU (default)
53+
# ============================================================================
54+
FROM golang:1.24-bookworm AS builder-cpu
55+
56+
RUN apt-get update && apt-get install -y --no-install-recommends \
57+
build-essential cmake && \
58+
rm -rf /var/lib/apt/lists/*
59+
60+
WORKDIR /src
61+
COPY --from=sources /src/llama-src llama-src
62+
COPY --from=sources /src/whisper-src whisper-src
63+
64+
# Build llama.cpp (CPU)
4165
RUN cd llama-src && \
4266
cmake -B build -DBUILD_SHARED_LIBS=OFF && \
4367
cmake --build build --config Release -j$(nproc)
4468

45-
# Build whisper.cpp
69+
# Build whisper.cpp (CPU)
4670
RUN cd whisper-src && \
4771
cmake -B build -DBUILD_SHARED_LIBS=OFF && \
4872
cmake --build build --config Release -j$(nproc)
@@ -62,36 +86,157 @@ RUN mkdir -p /out/whisper.cpp/linux-amd64 /out/whisper.cpp/include /out/whisper.
6286
cp whisper-src/ggml/include/*.h /out/whisper.cpp/ggml/include/
6387

6488
# ============================================================================
65-
# Build test — verifies the .a files link correctly with Go CGO
89+
# Builder: CUDA
6690
# ============================================================================
67-
FROM golang:1.24-bookworm AS build-test
91+
FROM nvidia/cuda:12.8.0-devel-ubuntu24.04 AS builder-cuda
6892

93+
RUN apt-get update && apt-get install -y --no-install-recommends \
94+
build-essential cmake wget && \
95+
rm -rf /var/lib/apt/lists/*
96+
97+
WORKDIR /src
98+
COPY --from=sources /src/llama-src llama-src
99+
COPY --from=sources /src/whisper-src whisper-src
100+
101+
# Build llama.cpp (CUDA)
102+
RUN cd llama-src && \
103+
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON && \
104+
cmake --build build --config Release -j$(nproc)
105+
106+
# Build whisper.cpp (CUDA)
107+
RUN cd whisper-src && \
108+
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON && \
109+
cmake --build build --config Release -j$(nproc)
110+
111+
# Collect llama.cpp artifacts (CUDA variant)
112+
RUN mkdir -p /out/llama.cpp/linux-amd64-cuda /out/llama.cpp/include /out/llama.cpp/ggml/include /out/llama.cpp/common && \
113+
find llama-src/build -name "*.a" -exec cp {} /out/llama.cpp/linux-amd64-cuda/ \; && \
114+
cp llama-src/include/*.h /out/llama.cpp/include/ && \
115+
cp llama-src/ggml/include/*.h /out/llama.cpp/ggml/include/ && \
116+
cp llama-src/common/common.h /out/llama.cpp/common/ && \
117+
cp llama-src/common/sampling.h /out/llama.cpp/common/
118+
119+
# Collect whisper.cpp artifacts (CUDA variant)
120+
RUN mkdir -p /out/whisper.cpp/linux-amd64-cuda /out/whisper.cpp/include /out/whisper.cpp/ggml/include && \
121+
find whisper-src/build -name "*.a" -exec cp {} /out/whisper.cpp/linux-amd64-cuda/ \; && \
122+
cp whisper-src/include/*.h /out/whisper.cpp/include/ && \
123+
cp whisper-src/ggml/include/*.h /out/whisper.cpp/ggml/include/
124+
125+
# ============================================================================
126+
# Builder: Vulkan
127+
# ============================================================================
128+
# Use Ubuntu 24.04 for Vulkan — bookworm's Vulkan 1.3.239 is too old
129+
# (llama.cpp b8220+ needs VK_EXT_layer_settings from Vulkan 1.3.261+)
130+
FROM ubuntu:24.04 AS builder-vulkan
131+
132+
RUN apt-get update && apt-get install -y --no-install-recommends \
133+
build-essential cmake wget ca-certificates libvulkan-dev glslang-tools glslc && \
134+
rm -rf /var/lib/apt/lists/*
135+
136+
WORKDIR /src
137+
COPY --from=sources /src/llama-src llama-src
138+
COPY --from=sources /src/whisper-src whisper-src
139+
140+
# Build llama.cpp (Vulkan)
141+
RUN cd llama-src && \
142+
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_VULKAN=ON && \
143+
cmake --build build --config Release -j$(nproc)
144+
145+
# Build whisper.cpp (Vulkan)
146+
RUN cd whisper-src && \
147+
cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_VULKAN=ON && \
148+
cmake --build build --config Release -j$(nproc)
149+
150+
# Collect llama.cpp artifacts (Vulkan variant)
151+
RUN mkdir -p /out/llama.cpp/linux-amd64-vulkan /out/llama.cpp/include /out/llama.cpp/ggml/include /out/llama.cpp/common && \
152+
find llama-src/build -name "*.a" -exec cp {} /out/llama.cpp/linux-amd64-vulkan/ \; && \
153+
cp llama-src/include/*.h /out/llama.cpp/include/ && \
154+
cp llama-src/ggml/include/*.h /out/llama.cpp/ggml/include/ && \
155+
cp llama-src/common/common.h /out/llama.cpp/common/ && \
156+
cp llama-src/common/sampling.h /out/llama.cpp/common/
157+
158+
# Collect whisper.cpp artifacts (Vulkan variant)
159+
RUN mkdir -p /out/whisper.cpp/linux-amd64-vulkan /out/whisper.cpp/include /out/whisper.cpp/ggml/include && \
160+
find whisper-src/build -name "*.a" -exec cp {} /out/whisper.cpp/linux-amd64-vulkan/ \; && \
161+
cp whisper-src/include/*.h /out/whisper.cpp/include/ && \
162+
cp whisper-src/ggml/include/*.h /out/whisper.cpp/ggml/include/
163+
164+
# ============================================================================
165+
# Dynamic backend selection — picks the right builder stage
166+
# ============================================================================
167+
FROM builder-${GPU_BACKEND} AS builder
168+
169+
# ============================================================================
170+
# Build test bases — provide the right link libraries per backend
171+
# ============================================================================
172+
FROM golang:1.24-bookworm AS build-test-base-cpu
69173
RUN apt-get update && apt-get install -y --no-install-recommends \
70174
build-essential libgomp1 && \
71175
rm -rf /var/lib/apt/lists/*
72176

177+
FROM nvidia/cuda:12.8.0-devel-ubuntu24.04 AS build-test-base-cuda
178+
RUN apt-get update && apt-get install -y --no-install-recommends \
179+
build-essential libgomp1 wget && \
180+
rm -rf /var/lib/apt/lists/* && \
181+
wget -qO go.tar.gz https://go.dev/dl/go1.24.6.linux-amd64.tar.gz && \
182+
tar -C /usr/local -xzf go.tar.gz && rm go.tar.gz
183+
ENV PATH="/usr/local/go/bin:${PATH}"
184+
185+
FROM ubuntu:24.04 AS build-test-base-vulkan
186+
RUN apt-get update && apt-get install -y --no-install-recommends \
187+
build-essential libgomp1 libvulkan-dev wget ca-certificates && \
188+
rm -rf /var/lib/apt/lists/* && \
189+
wget -qO go.tar.gz https://go.dev/dl/go1.24.6.linux-amd64.tar.gz && \
190+
tar -C /usr/local -xzf go.tar.gz && rm go.tar.gz
191+
ENV PATH="/usr/local/go/bin:${PATH}"
192+
193+
# ============================================================================
194+
# Build test — verifies the .a files link correctly with Go CGO
195+
# ============================================================================
196+
FROM build-test-base-${GPU_BACKEND} AS build-test
197+
198+
ARG GPU_BACKEND=cpu
199+
73200
WORKDIR /src
74201
COPY . .
75202

203+
# Determine prebuilt directory suffix based on GPU_BACKEND
204+
RUN SUFFIX="" && \
205+
if [ "$GPU_BACKEND" = "cuda" ]; then SUFFIX="-cuda"; fi && \
206+
if [ "$GPU_BACKEND" = "vulkan" ]; then SUFFIX="-vulkan"; fi && \
207+
echo "Using prebuilt suffix: linux-amd64${SUFFIX}"
208+
76209
# Copy freshly built .a files into the source tree
77-
COPY --from=builder /out/llama.cpp/linux-amd64/ /src/ggml/llamacpp/third_party/prebuilt/linux-amd64/
210+
COPY --from=builder /out/llama.cpp/linux-amd64*/ /tmp/llama-libs/
78211
COPY --from=builder /out/llama.cpp/include/ /src/ggml/llamacpp/third_party/include/
79212
COPY --from=builder /out/llama.cpp/ggml/include/ /src/ggml/llamacpp/third_party/ggml/include/
80213
COPY --from=builder /out/llama.cpp/common/ /src/ggml/llamacpp/third_party/common/
81-
COPY --from=builder /out/whisper.cpp/linux-amd64/ /src/ggml/whispercpp/third_party/prebuilt/linux-amd64/
214+
COPY --from=builder /out/whisper.cpp/linux-amd64*/ /tmp/whisper-libs/
82215
COPY --from=builder /out/whisper.cpp/include/ /src/ggml/whispercpp/third_party/include/
83216
COPY --from=builder /out/whisper.cpp/ggml/include/ /src/ggml/whispercpp/third_party/ggml/include/
84217

85-
# Test stub builds (no CGO)
86-
RUN CGO_ENABLED=0 go build ./ggml/llamacpp/... && \
218+
# Copy .a files to correct prebuilt directory based on GPU_BACKEND
219+
RUN SUFFIX="" && \
220+
if [ "$GPU_BACKEND" = "cuda" ]; then SUFFIX="-cuda"; fi && \
221+
if [ "$GPU_BACKEND" = "vulkan" ]; then SUFFIX="-vulkan"; fi && \
222+
mkdir -p /src/ggml/llamacpp/third_party/prebuilt/linux-amd64${SUFFIX} && \
223+
mkdir -p /src/ggml/whispercpp/third_party/prebuilt/linux-amd64${SUFFIX} && \
224+
cp /tmp/llama-libs/*.a /src/ggml/llamacpp/third_party/prebuilt/linux-amd64${SUFFIX}/ && \
225+
cp /tmp/whisper-libs/*.a /src/ggml/whispercpp/third_party/prebuilt/linux-amd64${SUFFIX}/
226+
227+
# Determine build tags based on GPU_BACKEND
228+
RUN LLAMA_TAGS="llamacpp" && \
229+
WHISPER_TAGS="whispercpp" && \
230+
if [ "$GPU_BACKEND" = "cuda" ]; then LLAMA_TAGS="llamacpp,cuda"; WHISPER_TAGS="whispercpp,cuda"; fi && \
231+
if [ "$GPU_BACKEND" = "vulkan" ]; then LLAMA_TAGS="llamacpp,vulkan"; WHISPER_TAGS="whispercpp,vulkan"; fi && \
232+
echo "Build tags: llama=${LLAMA_TAGS} whisper=${WHISPER_TAGS}" && \
233+
CGO_ENABLED=0 go build ./ggml/llamacpp/... && \
87234
CGO_ENABLED=0 go build ./ggml/whispercpp/... && \
88-
echo "stub builds OK"
89-
90-
# Test CGO builds (link against .a files)
91-
RUN CGO_ENABLED=1 go build -tags llamacpp ./ggml/llamacpp/... && \
92-
echo "llamacpp CGO build OK"
93-
RUN CGO_ENABLED=1 go build -tags whispercpp ./ggml/whispercpp/... && \
94-
echo "whispercpp CGO build OK"
235+
echo "stub builds OK" && \
236+
CGO_ENABLED=1 go build -tags "${LLAMA_TAGS}" ./ggml/llamacpp/... && \
237+
echo "llamacpp CGO build OK (${LLAMA_TAGS})" && \
238+
CGO_ENABLED=1 go build -tags "${WHISPER_TAGS}" ./ggml/whispercpp/... && \
239+
echo "whispercpp CGO build OK (${WHISPER_TAGS})"
95240

96241
# Run stub tests
97242
RUN CGO_ENABLED=0 go test ./ggml/llamacpp/... && \

ggml/llamacpp/embed.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,7 @@ import "embed"
1313
//go:embed third_party/common/*.h
1414
//go:embed third_party/prebuilt/darwin-amd64/*.a
1515
//go:embed third_party/prebuilt/linux-amd64/*.a
16+
//go:embed third_party/prebuilt/linux-amd64-cuda/*.a
17+
//go:embed third_party/prebuilt/linux-amd64-vulkan/*.a
1618
//go:embed third_party/prebuilt/linux-arm64/*.a
1719
var _ embed.FS

ggml/llamacpp/gpu_conflict.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
//go:build cuda && vulkan
2+
3+
// Copyright 2025 FootprintAI
4+
// SPDX-License-Identifier: Apache-2.0
5+
6+
package llamacpp
7+
8+
// cuda and vulkan build tags are mutually exclusive.
9+
// Setting both will produce a compile error.
10+
var _ int = "cuda and vulkan are mutually exclusive"

ggml/llamacpp/llamacpp.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,7 @@ package llamacpp
1212
#cgo CXXFLAGS: -std=c++17 -I${SRCDIR}/third_party/include -I${SRCDIR}/third_party/ggml/include -I${SRCDIR}/third_party/common
1313
#cgo darwin,arm64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/darwin-arm64
1414
#cgo darwin,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/darwin-amd64
15-
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64
16-
#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-arm64
1715
#cgo darwin LDFLAGS: -lcommon -lllama -lggml-cpu -lggml-base -lggml -lggml-blas -lggml-metal -L/usr/local/opt/libomp/lib -L/opt/homebrew/opt/libomp/lib -lomp -framework Accelerate -framework Metal -framework Foundation -lstdc++ -lm
18-
#cgo linux LDFLAGS: -Wl,--start-group -lcommon -lllama -lggml-cpu -lggml-base -lggml -Wl,--end-group -lstdc++ -lm -lpthread -ldl -lrt -lgomp
1916
#include <stdlib.h>
2017
#include <stdbool.h>
2118
#include "wrapper.h"

ggml/llamacpp/llamacpp_linux.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//go:build llamacpp && linux && !cuda && !vulkan
2+
3+
// Copyright 2025 FootprintAI
4+
// SPDX-License-Identifier: Apache-2.0
5+
6+
package llamacpp
7+
8+
/*
9+
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64
10+
#cgo linux,arm64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-arm64
11+
#cgo linux LDFLAGS: -Wl,--start-group -lcommon -lllama -lggml-cpu -lggml-base -lggml -Wl,--end-group -lstdc++ -lm -lpthread -ldl -lrt -lgomp
12+
*/
13+
import "C"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
//go:build llamacpp && linux && cuda
2+
3+
// Copyright 2025 FootprintAI
4+
// SPDX-License-Identifier: Apache-2.0
5+
6+
package llamacpp
7+
8+
/*
9+
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64-cuda
10+
#cgo linux LDFLAGS: -Wl,--start-group -lcommon -lllama -lggml-cpu -lggml-base -lggml -lggml-cuda -Wl,--end-group -lcudart -lcublas -lcublasLt -lstdc++ -lm -lpthread -ldl -lrt -lgomp
11+
*/
12+
import "C"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
//go:build llamacpp && linux && vulkan
2+
3+
// Copyright 2025 FootprintAI
4+
// SPDX-License-Identifier: Apache-2.0
5+
6+
package llamacpp
7+
8+
/*
9+
#cgo linux,amd64 LDFLAGS: -L${SRCDIR}/third_party/prebuilt/linux-amd64-vulkan
10+
#cgo linux LDFLAGS: -Wl,--start-group -lcommon -lllama -lggml-cpu -lggml-base -lggml -lggml-vulkan -Wl,--end-group -lvulkan -lstdc++ -lm -lpthread -ldl -lrt -lgomp
11+
*/
12+
import "C"
10.8 MB
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)