Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,21 +1,63 @@
{%- set dev_mode = dev_mode | default(false) -%}
{%- set config_variant = config_variant | default('gpu') -%}

{%- set worker_service_base = 'presto-native-worker-gpu-dev' if dev_mode else 'presto-native-worker-gpu' -%}
{%- set worker_image = worker_service_base ~ ':${PRESTO_IMAGE_TAG:?PRESTO_IMAGE_TAG must be set}' -%}
{%- set config_root = '../../config/generated/' ~ config_variant -%}

{%- set dev_env_lines = [
'HOME: /workspace/home',
'PRESTO_BUILD_TYPE: ${PRESTO_BUILD_TYPE:-RelWithDebInfo}',
'PRESTO_BUILD_DIR_NAME: ${PRESTO_BUILD_DIR_NAME:-relwithdebinfo}',
'PRESTO_REBUILD: ${PRESTO_REBUILD:-0}',
'PRESTO_FORCE_REBUILD: ${PRESTO_FORCE_REBUILD:-0}',
'PRESTO_SKIP_SERVER: ${PRESTO_SKIP_SERVER:-0}',
'PRESTO_NUM_THREADS: ${PRESTO_NUM_THREADS:-12}',
'CUDA_ARCHITECTURES: ${CUDA_ARCHITECTURES:-}',
] -%}

{%- set dev_source_mount_lines = [
'- ../../../../../presto:/workspace/presto:rw',
'- ../../../../../velox:/workspace/presto/presto-native-execution/velox:rw',
'- ${PRESTO_DEV_STATE_ROOT:-../devstate}:/workspace:rw',
] -%}

x-presto-native-worker-gpu: &gpu_worker_base
extends:
file: ../../docker-compose.common.yml
service: presto-base-native-worker
image: presto-native-worker-gpu:${PRESTO_IMAGE_TAG:?PRESTO_IMAGE_TAG must be set}
service: {% if dev_mode %}presto-base-volumes{% else %}presto-base-native-worker{% endif %}
image: {{ worker_image }}
{% if dev_mode %}
build:
# This compose is rendered into docker-compose/generated/, so we need one extra
# level of '..' compared to compose files living directly in presto/docker/.
# Target context is the common parent of velox-testing/, presto/, and velox/.
context: ../../../../../
dockerfile: velox-testing/presto/docker/native_dev_build.dockerfile
args:
- BASE_IMAGE=${DEPS_IMAGE:-presto/prestissimo-dependency:centos9-${USER}}
{% else %}
build:
args:
- GPU=ON
{% if sccache %}
{% if sccache %}
ulimits: {nofile: {soft: 500000, hard: 500000}}
secrets:
- github_token
- aws_credentials
{% endif %}
{% endif %}
{% endif %}
runtime: nvidia
cap_add:
{% if dev_mode %}
- SYS_PTRACE
{% endif %}
- IPC_LOCK
- SYS_ADMIN # Required for nsys GPU metrics profiling
{% if dev_mode %}
security_opt:
- seccomp=unconfined
{% endif %}
pid: host
ulimits:
memlock: -1
Expand Down Expand Up @@ -46,36 +88,46 @@ x-presto-native-worker-gpu: &gpu_worker_base
- presto-coordinator
privileged: true
volumes:
# required for numa
# required for numa
- /sys/devices/system/node:/sys/devices/system/node
- ../../config/generated/gpu/etc_common:/opt/presto-server/etc
- ../../config/generated/gpu/etc_worker/node.properties:/opt/presto-server/etc/node.properties
- ../../config/generated/gpu/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties
- ../../config/generated/gpu/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
- {{ config_root }}/etc_common:/opt/presto-server/etc
- {{ config_root }}/etc_worker/node.properties:/opt/presto-server/etc/node.properties
- {{ config_root }}/etc_worker/config_native.properties:/opt/presto-server/etc/config.properties
- {{ config_root }}/etc_worker/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
{% if dev_mode %}
{% for line in dev_source_mount_lines %}
{{ line }}
{% endfor %}
{% endif %}

services:
presto-coordinator:
extends:
file: ../../docker-compose.common.yml
service: presto-base-coordinator
volumes:
- ../../config/generated/gpu/etc_common:/opt/presto-server/etc
- ../../config/generated/gpu/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties
- ../../config/generated/gpu/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties
- ../../config/generated/gpu/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
- {{ config_root }}/etc_common:/opt/presto-server/etc
- {{ config_root }}/etc_coordinator/config_native.properties:/opt/presto-server/etc/config.properties
- {{ config_root }}/etc_coordinator/node.properties:/opt/presto-server/etc/node.properties
- {{ config_root }}/etc_coordinator/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties

{% if workers|length > 1 and not single_container %}
# Separate GPU workers - runs each worker in a separate container, pinned to a specific GPU
{% for gpu_id in workers %}
presto-native-worker-gpu-{{ gpu_id }}:
{{ worker_service_base }}-{{ gpu_id }}:
<<: *gpu_worker_base
container_name: presto-native-worker-gpu-{{ gpu_id }}
container_name: {{ worker_service_base }}-{{ gpu_id }}
environment:
WORKER_ID: {{ gpu_id }}
NVIDIA_VISIBLE_DEVICES: all
PROFILE: ${PROFILE:-}
PROFILE_ARGS: ${PROFILE_ARGS:-}
SERVER_START_TIMESTAMP: ${SERVER_START_TIMESTAMP:-}
{% if dev_mode %}
{% for line in dev_env_lines %}
{{ line }}
{% endfor %}
{% endif %}
UCX_LOG_LEVEL: info #debug
UCX_RNDV_PIPELINE_ERROR_HANDLING: y
UCX_TLS: tcp,cuda_copy,cuda_ipc
Expand All @@ -87,23 +139,39 @@ services:
KVIKIO_NTHREADS: {{ kvikio_threads }}
CUDA_VISIBLE_DEVICES: {{ gpu_id }}
volumes:
- ../../config/generated/gpu/etc_common:/opt/presto-server/etc
- ../../config/generated/gpu/etc_worker_{{ gpu_id }}/node.properties:/opt/presto-server/etc/node.properties
- ../../config/generated/gpu/etc_worker_{{ gpu_id }}/config_native.properties:/opt/presto-server/etc/config.properties
- ../../config/generated/gpu/etc_worker_{{ gpu_id }}/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
# required for numa
- /sys/devices/system/node:/sys/devices/system/node
- {{ config_root }}/etc_common:/opt/presto-server/etc
- {{ config_root }}/etc_worker_{{ gpu_id }}/node.properties:/opt/presto-server/etc/node.properties
- {{ config_root }}/etc_worker_{{ gpu_id }}/config_native.properties:/opt/presto-server/etc/config.properties
- {{ config_root }}/etc_worker_{{ gpu_id }}/catalog/hive.properties:/opt/presto-server/etc/catalog/hive.properties
{% if dev_mode %}
{% for line in dev_source_mount_lines %}
{{ line }}
{% endfor %}
{% endif %}
{% endfor %}
{% else %}
# Combined GPU worker - runs {{ workers|length if workers else 1 }} presto servers in parallel, each pinned to a specific GPU
presto-native-worker-gpu:
{{ worker_service_base }}:
<<: *gpu_worker_base
container_name: presto-native-worker-gpu
container_name: {{ worker_service_base }}
{%- if workers %}
{% if dev_mode %}
command: ["bash", "/opt/launch_presto_server_dev.sh"{% for gpu_id in workers %}, "{{ gpu_id }}"{% endfor %}]
{% else %}
command: ["bash", "/opt/presto_profiling_wrapper.sh"{% for gpu_id in workers %}, "{{ gpu_id }}"{% endfor %}]
{% endif %}
environment:
NVIDIA_VISIBLE_DEVICES: all
PROFILE: ${PROFILE:-}
PROFILE_ARGS: ${PROFILE_ARGS:-}
SERVER_START_TIMESTAMP: ${SERVER_START_TIMESTAMP:-}
{% if dev_mode %}
{% for line in dev_env_lines %}
{{ line }}
{% endfor %}
{% endif %}
UCX_LOG_LEVEL: info #debug
UCX_RNDV_PIPELINE_ERROR_HANDLING: y
UCX_TLS: tcp,cuda_copy,cuda_ipc
Expand Down Expand Up @@ -132,17 +200,24 @@ services:
CUDA_VISIBLE_DEVICES: 0
{%- endif %}
volumes:
# required for numa
- /sys/devices/system/node:/sys/devices/system/node
{%- if workers %}
# Mount all etc directories for workers {{ workers|join(', ') }}
{%- for gpu_id in workers %}
- ../../config/generated/gpu/etc_common:/opt/presto-server/etc{{ gpu_id }}
- {{ config_root }}/etc_common:/opt/presto-server/etc{{ gpu_id }}
{%- endfor %}
{%- for gpu_id in workers %}
# worker {{ gpu_id }} configs
- ../../config/generated/gpu/etc_worker_{{ gpu_id }}/node.properties:/opt/presto-server/etc{{ gpu_id }}/node.properties
- ../../config/generated/gpu/etc_worker_{{ gpu_id }}/config_native.properties:/opt/presto-server/etc{{ gpu_id }}/config.properties
- ../../config/generated/gpu/etc_worker_{{ gpu_id }}/catalog/hive.properties:/opt/presto-server/etc{{ gpu_id }}/catalog/hive.properties
- {{ config_root }}/etc_worker_{{ gpu_id }}/node.properties:/opt/presto-server/etc{{ gpu_id }}/node.properties
- {{ config_root }}/etc_worker_{{ gpu_id }}/config_native.properties:/opt/presto-server/etc{{ gpu_id }}/config.properties
- {{ config_root }}/etc_worker_{{ gpu_id }}/catalog/hive.properties:/opt/presto-server/etc{{ gpu_id }}/catalog/hive.properties
{%- endfor %}
{% if dev_mode %}
{% for line in dev_source_mount_lines %}
{{ line }}
{% endfor %}
{% endif %}
{%- endif %}
{%- endif %}
{% if sccache %}
Expand Down
152 changes: 152 additions & 0 deletions presto/docker/launch_presto_server_dev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#!/bin/bash

set -euo pipefail

# Base flags for configuring/building presto-native-execution (including Velox).
# To avoid duplicating this long string across host + container scripts, host-side
# scripts should *append* via PRESTO_EXTRA_CMAKE_FLAGS_APPEND.
DEFAULT_PRESTO_EXTRA_CMAKE_FLAGS="-DPRESTO_ENABLE_TESTING=OFF -DPRESTO_ENABLE_PARQUET=ON -DPRESTO_ENABLE_CUDF=ON -DVELOX_BUILD_TESTING=OFF -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -DCMAKE_EXPORT_COMPILE_COMMANDS=ON"
PRESTO_EXTRA_CMAKE_FLAGS=${PRESTO_EXTRA_CMAKE_FLAGS:-"${DEFAULT_PRESTO_EXTRA_CMAKE_FLAGS}"}
PRESTO_EXTRA_CMAKE_FLAGS_APPEND=${PRESTO_EXTRA_CMAKE_FLAGS_APPEND:-""}
PRESTO_BUILD_BASE=/workspace/build
PRESTO_BUILD_TYPE=${PRESTO_BUILD_TYPE:-RelWithDebInfo}
PRESTO_BUILD_DIR_NAME=${PRESTO_BUILD_DIR_NAME:-relwithdebinfo}
PRESTO_REBUILD=${PRESTO_REBUILD:-0}
PRESTO_FORCE_REBUILD=${PRESTO_FORCE_REBUILD:-0}
PRESTO_SKIP_SERVER=${PRESTO_SKIP_SERVER:-0}
PRESTO_NUM_THREADS=${NUM_THREADS:-12}
PRESTO_SRC=/workspace/presto/presto-native-execution
PRESTO_ETC_DIR=/opt/presto-server/etc

PRESTO_BIN="${PRESTO_BUILD_BASE}/${PRESTO_BUILD_DIR_NAME}/presto_cpp/main/presto_server"

function log() {
echo "[presto-dev] $*"
}

function ensure_dir() {
mkdir -p "$1"
}

ensure_dir "$HOME"
ensure_dir "$HOME/.ccache"
ensure_dir "$HOME/.cache/clangd"

if [[ -f /opt/rh/gcc-toolset-14/enable ]]; then
# shellcheck disable=SC1091
source /opt/rh/gcc-toolset-14/enable
fi

function resolve_cuda_archs() {
if [[ -n "${CUDA_ARCHITECTURES:-}" ]]; then
echo "$CUDA_ARCHITECTURES"
return
fi
if command -v nvidia-smi >/dev/null 2>&1; then
local cuda_cap
cuda_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | head -n 1 | sed 's/\.//g')
if [[ -n "$cuda_cap" ]]; then
echo "$cuda_cap"
return
fi
fi
echo ""
}

function build_presto_if_needed() {
ensure_dir "$PRESTO_BUILD_BASE"
if [[ "$PRESTO_FORCE_REBUILD" == "1" ]]; then
rm -rf "${PRESTO_BUILD_BASE:?}/${PRESTO_BUILD_DIR_NAME}"
fi
if [[ -x "$PRESTO_BIN" && "$PRESTO_FORCE_REBUILD" != "1" && "$PRESTO_REBUILD" != "1" ]]; then
return 0
fi

local cuda_archs
cuda_archs=$(resolve_cuda_archs)
local extra_flags="${PRESTO_EXTRA_CMAKE_FLAGS} ${PRESTO_EXTRA_CMAKE_FLAGS_APPEND}"
if [[ -n "$cuda_archs" ]]; then
extra_flags="${extra_flags} -DCMAKE_CUDA_ARCHITECTURES=${cuda_archs}"
fi

# Optional local cuDF checkout override (in-container path).
# When set, force Velox to build cuDF from source and point FetchContent at the local checkout.
if [[ -n "${PRESTO_CUDF_DIR:-}" && -d "${PRESTO_CUDF_DIR}" ]]; then
if [[ "$extra_flags" != *"FETCHCONTENT_SOURCE_DIR_CUDF="* ]]; then
extra_flags="${extra_flags} -DFETCHCONTENT_SOURCE_DIR_CUDF=${PRESTO_CUDF_DIR}"
fi
if [[ "$extra_flags" != *"-Dcudf_SOURCE="* ]]; then
extra_flags="${extra_flags} -Dcudf_SOURCE=BUNDLED"
fi
fi

local build_dir="${PRESTO_BUILD_BASE}/${PRESTO_BUILD_DIR_NAME}"
if [[ "$PRESTO_REBUILD" == "1" && -f "${build_dir}/CMakeCache.txt" ]]; then
log "Incremental build (no reconfigure) (${PRESTO_BUILD_TYPE}/${PRESTO_BUILD_DIR_NAME})"
(cd "$build_dir" && ninja -j "$PRESTO_NUM_THREADS" presto_server)
else
log "Configuring + building (${PRESTO_BUILD_TYPE}/${PRESTO_BUILD_DIR_NAME})"
(cd "$PRESTO_SRC" && \
make cmake-and-build \
BUILD_BASE_DIR="$PRESTO_BUILD_BASE" \
BUILD_DIR="$PRESTO_BUILD_DIR_NAME" \
BUILD_TYPE="$PRESTO_BUILD_TYPE" \
NUM_THREADS="$PRESTO_NUM_THREADS" \
EXTRA_CMAKE_FLAGS="$extra_flags")
fi
}

if [[ ! -d "$PRESTO_SRC" ]]; then
log "ERROR: PRESTO_SRC directory '$PRESTO_SRC' not found."
exit 1
fi

if ! build_presto_if_needed; then
log "Container is running without presto_server. Attach with 'docker exec -it $HOSTNAME bash' to build manually."
PRESTO_SKIP_SERVER=1
fi

if [[ "$PROFILE" == "ON" ]]; then
ensure_dir /presto_profiles
if [[ -z "$PROFILE_ARGS" ]]; then
PROFILE_ARGS="-t nvtx,cuda,osrt \
--cuda-memory-usage=true \
--cuda-um-cpu-page-faults=true \
--cuda-um-gpu-page-faults=true \
--cudabacktrace=true"
fi
PROFILE_CMD=(nsys launch $PROFILE_ARGS)
else
PROFILE_CMD=()
fi

# Fix for libboost_iostreams.so.1.84.0 not found
if [[ $(id -u) -eq 0 ]]; then
ldconfig /usr/local/lib
else
ldconfig /usr/local/lib >/dev/null 2>&1 || true
fi

if [[ "$PRESTO_SKIP_SERVER" == "1" ]]; then
log "PRESTO_SKIP_SERVER=1 - container is idling for interactive development."
tail -f /dev/null
else
if [[ ! -x "$PRESTO_BIN" ]]; then
log "ERROR: presto_server binary missing at ${PRESTO_BIN}"
exit 1
fi
if [[ $# -gt 0 ]]; then
# Launch multiple presto servers in parallel, each pinned to a different GPU.
# The GPU IDs are passed as command-line arguments and the template mounts
# per-GPU etc dirs at /opt/presto-server/etc<gpu_id>.
log "Starting presto_server(s) from ${PRESTO_BIN} for GPU IDs: $*"
for gpu_id in "$@"; do
CUDA_VISIBLE_DEVICES=$gpu_id "${PROFILE_CMD[@]}" "$PRESTO_BIN" --etc-dir="${PRESTO_ETC_DIR}${gpu_id}" &
done
wait
else
log "Starting presto_server from ${PRESTO_BIN}"
exec "${PROFILE_CMD[@]}" "$PRESTO_BIN" --etc-dir="$PRESTO_ETC_DIR"
fi
fi

34 changes: 34 additions & 0 deletions presto/docker/native_dev_build.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
ARG BASE_IMAGE=presto/prestissimo-dependency:centos9
FROM ${BASE_IMAGE}

ARG NSIGHT_VERSION=2025.5.1

ARG CUDA_ARCHITECTURES="75;80;86;90;100;120"

ENV CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES}
ENV PRESTO_SRC_ROOT=/workspace/presto/presto-native-execution
ENV VELOX_SRC_ROOT=/workspace/presto/presto-native-execution/velox
ENV PRESTO_DEV_BUILD_BASE=/workspace/build

RUN rpm --import https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub && \
dnf config-manager --add-repo "https://developer.download.nvidia.com/devtools/repos/rhel$(source /etc/os-release; echo ${VERSION_ID%%.*})/$(rpm --eval '%{_arch}' | sed s/aarch/arm/)/" && \
dnf install -y nsight-systems-cli-${NSIGHT_VERSION}

RUN dnf install -y bzip2-devel gdb gcc-toolset-14 && dnf clean all

ENV GCC_TOOLSET_ROOT=/opt/rh/gcc-toolset-14/root
ENV PATH=${GCC_TOOLSET_ROOT}/bin:${PATH}
ENV LD_LIBRARY_PATH=${GCC_TOOLSET_ROOT}/lib64:${LD_LIBRARY_PATH}
ENV CC=${GCC_TOOLSET_ROOT}/bin/gcc
ENV CXX=${GCC_TOOLSET_ROOT}/bin/g++

ENV HOME=/workspace/home
RUN mkdir -p ${HOME}

COPY velox-testing/presto/docker/launch_presto_server_dev.sh /opt/launch_presto_server_dev.sh
RUN chmod +x /opt/launch_presto_server_dev.sh

WORKDIR /workspace

CMD ["bash", "/opt/launch_presto_server_dev.sh"]

Loading