-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathDockerfile.worker
More file actions
171 lines (151 loc) · 9.54 KB
/
Dockerfile.worker
File metadata and controls
171 lines (151 loc) · 9.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Dockerfile.worker — builds cmd/duckgres-worker, the DuckDB-service-only
# binary. Pinned to a single DuckDB driver version per build via the
# DUCKDB_GO_VERSION / DUCKDB_BINDINGS_VERSION / DUCKDB_EXTENSION_VERSION /
# HTTPFS_EXTENSION_TAG build args. The matrix-build CD workflow produces
# one image per (DuckDB version × arch).
#
# The control plane spawns these images as worker pods; their PG wire
# surface is the all-in-one duckgres binary (or the CP-only binary in
# cmd/duckgres-controlplane), which routes queries to whichever worker
# image the per-tenant `image` config-store column points at.
FROM golang:1.25-bookworm AS builder
RUN apt-get update && apt-get install -y --no-install-recommends gcc g++ libc6-dev curl gzip && rm -rf /var/lib/apt/lists/*
WORKDIR /build
# DuckDB driver pin. Default to whatever the repo's go.mod is currently
# tracking; override via --build-arg when the matrix produces an image
# for a specific DuckDB minor version. The corresponding
# duckdb-go-bindings version must move in lock-step (the duckdb-go-bindings
# release stream uses the same numeric encoding as duckdb-go/v2 just
# without the v2 prefix).
ARG DUCKDB_GO_VERSION=
ARG DUCKDB_BINDINGS_VERSION=
ARG TARGETARCH
# Copy ONLY the module files first and apply the per-build DuckDB pin, so the
# module download + extension fetch below sit in layers keyed solely on
# go.mod/go.sum + the pin args. On a source-only PR these cache-hit and the
# final `go build` recompiles only changed first-party code.
#
# IMPORTANT: do NOT run `go mod tidy` here. With only go.mod/go.sum present
# (no .go files), tidy prunes EVERY require directive — it sees zero imports —
# leaving an empty go.mod that breaks the build. `go get pkg@ver` explicitly
# records the require regardless of imports, so it pins correctly without
# source. tidy runs after COPY . ., once the real imports are visible.
COPY go.mod go.sum ./
RUN if [ -n "$DUCKDB_GO_VERSION" ] && [ -n "$DUCKDB_BINDINGS_VERSION" ]; then \
go get "github.com/duckdb/duckdb-go/v2@${DUCKDB_GO_VERSION}" \
&& go get "github.com/duckdb/duckdb-go-bindings@${DUCKDB_BINDINGS_VERSION}" \
&& for arch in darwin-arm64 darwin-amd64 linux-arm64 linux-amd64 windows-amd64; do \
go get "github.com/duckdb/duckdb-go-bindings/lib/${arch}@${DUCKDB_BINDINGS_VERSION}" 2>/dev/null || true; \
done ; \
fi
RUN go mod download
# Bundled DuckDB extensions. Downloaded BEFORE `COPY . .` so this layer
# depends only on the extension/pin args, not on source — a source-only PR
# keeps the GHA layer-cache hit and skips the 5 downloads. (They previously
# ran after the source COPY + build, so they re-fetched on every edit.)
ARG DUCKDB_EXTENSION_VERSION=1.5.3
ARG HTTPFS_EXTENSION_TAG=v1.5.3-stoi-fix
ARG DUCKLAKE_EXTENSION_TAG=v1.0-posthog.4
ARG DUCKDB_EXTENSION_REPOSITORY=https://extensions.duckdb.org
# Repository for postgres_scanner specifically. Defaults to the stable
# extensions repo, overridable per-row in CI (e.g. legacy DuckDB versions
# may need the nightly repo to match what was previously published).
ARG POSTGRES_SCANNER_REPOSITORY=https://extensions.duckdb.org
# Cross-check that DUCKDB_EXTENSION_VERSION (which keys the bundled-extension
# directory layout) matches the DuckDB version implied by DUCKDB_BINDINGS_VERSION.
# Encoding is `v0.<major><minor:02d><patch:02d>.0`, so 1.5.3 ↔ v0.10503.0.
# Without this, a CI matrix slip (e.g. extension_version=1.5.3 while bindings=v0.10502.0)
# would seed extensions under v1.5.3/ for a 1.5.2 engine that looks under v1.5.2/,
# silently falling back to upstream extensions.duckdb.org for httpfs/ducklake
# — the bundled PostHog forks would never load.
RUN if [ -n "$DUCKDB_BINDINGS_VERSION" ] && [ -n "$DUCKDB_EXTENSION_VERSION" ]; then \
major="${DUCKDB_EXTENSION_VERSION%%.*}" ; \
rest="${DUCKDB_EXTENSION_VERSION#*.}" ; \
minor="${rest%%.*}" ; \
patch="${rest##*.}" ; \
expected=$(printf 'v0.%d%02d%02d.0' "$major" "$minor" "$patch") ; \
if [ "$expected" != "$DUCKDB_BINDINGS_VERSION" ]; then \
echo "ERROR: extension/bindings version mismatch — DUCKDB_EXTENSION_VERSION=$DUCKDB_EXTENSION_VERSION implies bindings $expected, but got $DUCKDB_BINDINGS_VERSION" >&2 ; \
exit 1 ; \
fi ; \
echo "Verified: DUCKDB_EXTENSION_VERSION=$DUCKDB_EXTENSION_VERSION matches DUCKDB_BINDINGS_VERSION=$DUCKDB_BINDINGS_VERSION" ; \
fi
# `: ${VAR:?msg}` asserts every required input is non-empty — catches a
# CI matrix row that forgets to pass a build-arg and would otherwise
# silently fall back to the ARG default, producing a cross-version
# bundle. The per-file `[ -s ... ]` size check below catches the
# curl|gunzip failure modes — a curl -fsSL 404 writes nothing, gunzip
# on empty input exits non-zero, the && chain breaks. (`set -o pipefail`
# would be cleaner but /bin/sh here is dash, which rejects -o pipefail.)
RUN : "${DUCKDB_EXTENSION_VERSION:?must be set}" \
&& : "${HTTPFS_EXTENSION_TAG:?must be set}" \
&& : "${DUCKLAKE_EXTENSION_TAG:?must be set}" \
&& : "${DUCKDB_EXTENSION_REPOSITORY:?must be set}" \
&& : "${POSTGRES_SCANNER_REPOSITORY:?must be set}" \
&& mkdir -p "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}" \
&& curl -fsSL "https://github.com/PostHog/duckdb-httpfs/releases/download/${HTTPFS_EXTENSION_TAG}/httpfs-linux-${TARGETARCH}.duckdb_extension" \
-o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/httpfs.duckdb_extension" \
&& curl -fsSL "https://github.com/PostHog/ducklake/releases/download/${DUCKLAKE_EXTENSION_TAG}/ducklake-linux-${TARGETARCH}.duckdb_extension" \
-o "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/ducklake.duckdb_extension" \
&& curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension.gz" \
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/json.duckdb_extension" \
&& curl -fsSL "${POSTGRES_SCANNER_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension.gz" \
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/postgres_scanner.duckdb_extension" \
&& curl -fsSL "${DUCKDB_EXTENSION_REPOSITORY}/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/iceberg.duckdb_extension.gz" \
| gunzip > "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/iceberg.duckdb_extension" \
&& for f in httpfs ducklake json postgres_scanner iceberg; do \
[ -s "/build/duckdb-extensions/v${DUCKDB_EXTENSION_VERSION}/linux_${TARGETARCH}/$f.duckdb_extension" ] \
|| { echo "ERROR: $f.duckdb_extension is empty after fetch" >&2; exit 1; }; \
done
COPY . .
# COPY . . overlaid the host's (unpinned) go.mod/go.sum back on top of the
# pinned ones — without this re-pin the build would link the repo-default
# DuckDB version regardless of the matrix row (the real bug the bindings-pin
# verify below guards against). Re-apply the pin now that the source — and
# thus the real import set — is present, so `go mod tidy` keeps every needed
# require instead of pruning it. The module cache from the pre-COPY layers
# stays valid: same versions, so `go get` is a fast metadata no-op.
RUN if [ -n "$DUCKDB_GO_VERSION" ] && [ -n "$DUCKDB_BINDINGS_VERSION" ]; then \
go get "github.com/duckdb/duckdb-go/v2@${DUCKDB_GO_VERSION}" \
&& go get "github.com/duckdb/duckdb-go-bindings@${DUCKDB_BINDINGS_VERSION}" \
&& for arch in darwin-arm64 darwin-amd64 linux-arm64 linux-amd64 windows-amd64; do \
go get "github.com/duckdb/duckdb-go-bindings/lib/${arch}@${DUCKDB_BINDINGS_VERSION}" 2>/dev/null || true; \
done \
&& go mod tidy ; \
fi
ARG VERSION=dev
ARG COMMIT=unknown
ARG BUILD_TAGS=""
RUN CGO_ENABLED=1 go build -tags "${BUILD_TAGS}" \
-ldflags "-X main.version=${VERSION} -X main.commit=${COMMIT} -X main.date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
-o duckgres-worker \
./cmd/duckgres-worker
# Defense-in-depth: assert the binary actually links against the bindings
# version the build args asked for. If a future Dockerfile change re-breaks
# the pinning, this fails the build instead of shipping a silently-wrong
# image. The DUCKDB_BINDINGS_VERSION arg encodes DuckDB minor (e.g.
# v0.10501.0 -> DuckDB 1.5.1); the embedded module info in the binary must
# match exactly.
RUN if [ -n "$DUCKDB_BINDINGS_VERSION" ]; then \
embedded=$(go version -m ./duckgres-worker | awk '$2 == "github.com/duckdb/duckdb-go-bindings" { print $3 }') ; \
if [ "$embedded" != "$DUCKDB_BINDINGS_VERSION" ]; then \
echo "ERROR: bindings pin mismatch — wanted $DUCKDB_BINDINGS_VERSION, got $embedded" >&2 ; \
echo " (full embedded module info follows)" >&2 ; \
go version -m ./duckgres-worker | grep duckdb >&2 ; \
exit 1 ; \
fi ; \
echo "Verified: duckgres-worker linked against duckdb-go-bindings@$embedded" ; \
fi
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/*
RUN groupadd -r duckgres && useradd -r -g duckgres -d /app duckgres
WORKDIR /app
COPY --from=builder /build/duckgres-worker .
COPY --from=builder /build/duckdb-extensions ./extensions
RUN mkdir -p data certs && chown -R duckgres:duckgres /app
USER duckgres
# 8816 = Arrow Flight SQL listener (configurable via --duckdb-listen)
# 9090 = metrics. The CP-side PG wire port (5432) is intentionally absent;
# this binary does not serve PG wire.
EXPOSE 8816 9090
ENTRYPOINT ["/app/duckgres-worker"]