Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/test-llm-d-patch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ jobs:
git config --global user.email "foo@bar.com"
git config --global user.name "Spnl Dev"

- name: Generate patch
working-directory: docker/vllm/llm-d
run: ./genpatch.sh
# llmd 0.5.0 required AI help to rebase... We can re-enable this if we ever rebase our vllm span query branch to avoid the conflicts that come up when running genpatch
#- name: Generate patch
# working-directory: docker/vllm/llm-d
# run: ./genpatch.sh

- name: Apply patch
working-directory: docker/vllm/llm-d
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/vllm-gce.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ jobs:
name: Test in GCE VM
env:
# Adjust these as needed
VLLM_ORG: neuralmagic
VLLM_ORG: vllm-project
VLLM_REPO: vllm
VLLM_BRANCH: llm-d-release-0.4
VLLM_SHA: d7de043d55d1dd629554467e23874097e1c48993
MODEL: ibm-granite/granite-3.3-2b-instruct

# You probably won't need to change this
Expand Down
2 changes: 1 addition & 1 deletion cli/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ pub enum ImageCommands {
image_family: String,

/// LLM-D version for patch file
#[arg(long, default_value = "0.4.0")]
#[arg(long, default_value = "0.5.0")]
llmd_version: String,

/// GCE configuration
Expand Down
2 changes: 1 addition & 1 deletion cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ async fn main() -> Result<(), SpnlError> {
.llmd_version(llmd_version.clone())
.vllm_org(gce_config.vllm_org.clone())
.vllm_repo(gce_config.vllm_repo.clone())
.vllm_branch(gce_config.vllm_branch.clone())
.vllm_sha(gce_config.vllm_sha.clone())
.config(gce_config.clone())
.build()?,
)
Expand Down
3 changes: 2 additions & 1 deletion docker/gce/vllm/cloud-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ write_files:
SPNL_RELEASE=${spnl_release}
VLLM_ORG=${vllm_org}
VLLM_REPO=${vllm_repo}
VLLM_BRANCH=${vllm_branch}
VLLM_SHA=${vllm_sha}
VLLM_PRECOMPILED_WHEEL_COMMIT=${vllm_precompiled_wheel_commit}
MODEL=${model}
VLLM_PATCHFILE=/tmp/vllm.patch
${vllm_config_section}
Expand Down
2 changes: 1 addition & 1 deletion docker/gce/vllm/create-vllm-gce-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#
# Create a custom GCE image with vLLM pre-installed
# This script creates a reusable image based on the setup.sh logic
# This script creates a reusable image
#

set -euo pipefail
Expand Down
44 changes: 42 additions & 2 deletions docker/gce/vllm/setup-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,51 @@ fi
# Install vLLM
curl -LsSf https://astral.sh/uv/install.sh | sh
source $HOME/.local/bin/env
git clone https://github.com/$VLLM_ORG/$VLLM_REPO.git vllm -b $VLLM_BRANCH
git clone https://github.com/$VLLM_ORG/$VLLM_REPO.git vllm
cd vllm
git fetch origin $VLLM_SHA
git checkout $VLLM_SHA
uv venv --seed
source .venv/bin/activate
VLLM_USE_PRECOMPILED=1 uv pip install --editable .

# Default VLLM_PRECOMPILED_WHEEL_COMMIT to VLLM_SHA if not set
# This allows using precompiled binaries from a different commit (e.g., main) while checking out a specific source commit
VLLM_PRECOMPILED_WHEEL_COMMIT="${VLLM_PRECOMPILED_WHEEL_COMMIT:-${VLLM_SHA}}"

# Detect if precompiled wheel exists (following llm-d approach)
MACHINE=$(uname -m)
case "${MACHINE}" in
x86_64|amd64) PLATFORM_TAG="manylinux_2_31_x86_64" ;;
aarch64|arm64) PLATFORM_TAG="manylinux_2_31_aarch64" ;;
*) echo "Unsupported architecture: ${MACHINE}"; PLATFORM_TAG="" ;;
esac

WHEEL_URL=""
if [ -n "${PLATFORM_TAG}" ]; then
echo "Looking for precompiled wheel at: https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/vllm/"
WHEEL_INDEX_HTML=$(curl -sf "https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/vllm/" 2>/dev/null || echo "")
if [ -n "${WHEEL_INDEX_HTML}" ]; then
WHEEL_FILENAME=$(echo "${WHEEL_INDEX_HTML}" | grep -oE "vllm-[^\"]+${PLATFORM_TAG}\.whl" | head -1)
if [ -n "${WHEEL_FILENAME}" ]; then
# URL-encode the + sign in the wheel filename
WHEEL_URL="https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/${WHEEL_FILENAME}"
WHEEL_URL=$(echo "${WHEEL_URL}" | sed -E 's/\+/%2B/g')
echo "Found precompiled wheel: ${WHEEL_URL}"
fi
fi
fi

# Install vLLM with or without precompiled binaries
if [ -n "${WHEEL_URL}" ]; then
echo "Using precompiled binaries from commit: ${VLLM_PRECOMPILED_WHEEL_COMMIT} (source: ${VLLM_SHA})"
export VLLM_USE_PRECOMPILED=1
export VLLM_PRECOMPILED_WHEEL_LOCATION="${WHEEL_URL}"
uv pip install --editable .
else
echo "Compiling vLLM from source (no precompiled wheel found or unsupported platform)"
unset VLLM_USE_PRECOMPILED VLLM_PRECOMPILED_WHEEL_LOCATION 2>/dev/null || true
uv pip install --editable .
fi

# Wait for spnl build to complete
echo "Waiting for spnl build to complete..."
Expand Down
207 changes: 0 additions & 207 deletions docker/gce/vllm/setup.sh

This file was deleted.

4 changes: 2 additions & 2 deletions docker/vllm/llm-d/Containerfile.cuda
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG LLMD_VERSION=0.4.0
ARG LLMD_VERSION=0.5.0
ARG MANYLINUX_VERSION=2_34 # use 2_39 for aarch64; TODO is it possible to infer this from the build platform?

# Python version extractor
Expand Down Expand Up @@ -29,7 +29,7 @@ LABEL org.opencontainers.image.source=https://github.com/IBM/spnl
LABEL org.opencontainers.image.description="Span Query support for llm-d's vLLM"
LABEL org.opencontainers.image.licenses="Apache-2.0"

ARG LLMD_VERSION=0.4.0 # sigh, we need to repeat this if we want to use it inside of the FROM
ARG LLMD_VERSION=0.5.0 # sigh, we need to repeat this if we want to use it inside of the FROM

COPY --from=builder target/wheels/ /tmp/wheels
COPY docker/vllm/llm-d/patches/$LLMD_VERSION/ /tmp/patches
Expand Down
10 changes: 6 additions & 4 deletions docker/vllm/llm-d/clone.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

set -e

LLMD_VERSION=0.4.0
BASE_VLLM_FORK=https://github.com/neuralmagic/vllm.git
BASE_VLLM_BRANCH=llm-d-release-0.4
LLMD_VERSION=0.5.0
BASE_VLLM_FORK=https://github.com/vllm-project/vllm.git
BASE_VLLM_COMMIT_SHA=d7de043d55d1dd629554467e23874097e1c48993

git clone $BASE_VLLM_FORK -b $BASE_VLLM_BRANCH --depth 1
git clone $BASE_VLLM_FORK vllm
cd vllm
git fetch --depth=1 origin $BASE_VLLM_COMMIT_SHA
git checkout -q $BASE_VLLM_COMMIT_SHA

for patchfile in ../patches/$LLMD_VERSION/*.patch.gz
do git apply <(gunzip -c $patchfile) --reject
Expand Down
23 changes: 11 additions & 12 deletions docker/vllm/llm-d/genpatch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,24 @@ SCRIPTDIR=$(cd $(dirname "$0") && pwd)
SPANS_VLLM_FORK=https://github.com/starpit/vllm-ibm.git
SPANS_VLLM_BRANCH=spnl-ibm

LLMD_VERSION=0.4.0
BASE_VLLM_FORK=https://github.com/neuralmagic/vllm.git
BASE_VLLM_BRANCH=llm-d-release-0.4
LLMD_VERSION=0.5.0
BASE_VLLM_FORK=https://github.com/vllm-project/vllm.git
BASE_VLLM_COMMIT_SHA=d7de043d55d1dd629554467e23874097e1c48993

T=$(mktemp -d)
trap "rm -rf $T" EXIT
T=vllm
#trap "rm -rf $T" EXIT

git clone $BASE_VLLM_FORK $T/vllm-llmd -b $BASE_VLLM_BRANCH
git clone $BASE_VLLM_FORK $T/vllm-llmd
cd $T/vllm-llmd
BASE_VLLM_REVISION=$(git rev-parse --verify HEAD)
git fetch origin $BASE_VLLM_COMMIT_SHA
git checkout -q $BASE_VLLM_COMMIT_SHA
BASE_VLLM_REVISION=$BASE_VLLM_COMMIT_SHA

git remote add spans $SPANS_VLLM_FORK
git fetch spans $SPANS_VLLM_BRANCH
git checkout $SPANS_VLLM_BRANCH
SPANS_VLLM_REVISION=$(git rev-parse --verify HEAD)

git checkout $BASE_VLLM_BRANCH
git rebase spans/$SPANS_VLLM_BRANCH -C0
git rebase spans/$SPANS_VLLM_BRANCH -C0

# Notes: gzip --no-name ensures deterministic output (gzip won't save mtime in the file); this helps with git sanity
mkdir -p "$SCRIPTDIR"/patches
mkdir -p "$SCRIPTDIR"/patches/$LLMD_VERSION
git diff $BASE_VLLM_REVISION | gzip --no-name -c > "$SCRIPTDIR"/patches/$LLMD_VERSION/01-spans-llmd-vllm.patch.gz
Binary file not shown.
Loading
Loading