diff --git a/.github/workflows/test-llm-d-patch.yml b/.github/workflows/test-llm-d-patch.yml
index 01170d9a..b730c35f 100644
--- a/.github/workflows/test-llm-d-patch.yml
+++ b/.github/workflows/test-llm-d-patch.yml
@@ -31,9 +31,10 @@ jobs:
           git config --global user.email "foo@bar.com"
           git config --global user.name "Spnl Dev"
 
-      - name: Generate patch
-        working-directory: docker/vllm/llm-d
-        run: ./genpatch.sh
+      # llmd 0.5.0 required AI help to rebase... We can re-enable this if we ever rebase our vllm span query branch to avoid the conflicts that come up when running genpatch
+      #- name: Generate patch
+      #  working-directory: docker/vllm/llm-d
+      #  run: ./genpatch.sh
         
       - name: Apply patch
         working-directory: docker/vllm/llm-d
diff --git a/.github/workflows/vllm-gce.yml b/.github/workflows/vllm-gce.yml
index 280ec115..4f3e87b7 100644
--- a/.github/workflows/vllm-gce.yml
+++ b/.github/workflows/vllm-gce.yml
@@ -20,9 +20,9 @@ jobs:
     name: Test in GCE VM
     env:
       # Adjust these as needed
-      VLLM_ORG: neuralmagic
+      VLLM_ORG: vllm-project
       VLLM_REPO: vllm
-      VLLM_BRANCH: llm-d-release-0.4
+      VLLM_SHA: d7de043d55d1dd629554467e23874097e1c48993
       MODEL: ibm-granite/granite-3.3-2b-instruct
 
       # You probably won't need to change this
diff --git a/cli/src/args.rs b/cli/src/args.rs
index 5648b55a..69b23fb7 100644
--- a/cli/src/args.rs
+++ b/cli/src/args.rs
@@ -226,7 +226,7 @@ pub enum ImageCommands {
         image_family: String,
 
         /// LLM-D version for patch file
-        #[arg(long, default_value = "0.4.0")]
+        #[arg(long, default_value = "0.5.0")]
         llmd_version: String,
 
         /// GCE configuration
diff --git a/cli/src/main.rs b/cli/src/main.rs
index c2a0da6c..4c154cd7 100644
--- a/cli/src/main.rs
+++ b/cli/src/main.rs
@@ -114,7 +114,7 @@ async fn main() -> Result<(), SpnlError> {
                                 .llmd_version(llmd_version.clone())
                                 .vllm_org(gce_config.vllm_org.clone())
                                 .vllm_repo(gce_config.vllm_repo.clone())
-                                .vllm_branch(gce_config.vllm_branch.clone())
+                                .vllm_sha(gce_config.vllm_sha.clone())
                                 .config(gce_config.clone())
                                 .build()?,
                         )
diff --git a/docker/gce/vllm/cloud-config.yaml b/docker/gce/vllm/cloud-config.yaml
index fe1db798..9e37f038 100644
--- a/docker/gce/vllm/cloud-config.yaml
+++ b/docker/gce/vllm/cloud-config.yaml
@@ -23,7 +23,8 @@ write_files:
       SPNL_RELEASE=${spnl_release}
       VLLM_ORG=${vllm_org}
       VLLM_REPO=${vllm_repo}
-      VLLM_BRANCH=${vllm_branch}
+      VLLM_SHA=${vllm_sha}
+      VLLM_PRECOMPILED_WHEEL_COMMIT=${vllm_precompiled_wheel_commit}
       MODEL=${model}
       VLLM_PATCHFILE=/tmp/vllm.patch
 ${vllm_config_section}
diff --git a/docker/gce/vllm/create-vllm-gce-image.sh b/docker/gce/vllm/create-vllm-gce-image.sh
index c332cfe2..8da56de4 100755
--- a/docker/gce/vllm/create-vllm-gce-image.sh
+++ b/docker/gce/vllm/create-vllm-gce-image.sh
@@ -2,7 +2,7 @@
 
 #
 # Create a custom GCE image with vLLM pre-installed
-# This script creates a reusable image based on the setup.sh logic
+# This script creates a reusable image
 #
 
 set -euo pipefail
diff --git a/docker/gce/vllm/setup-dev.sh b/docker/gce/vllm/setup-dev.sh
index 8410317b..742b9feb 100644
--- a/docker/gce/vllm/setup-dev.sh
+++ b/docker/gce/vllm/setup-dev.sh
@@ -69,11 +69,51 @@ fi
 # Install vLLM
 curl -LsSf https://astral.sh/uv/install.sh | sh
 source $HOME/.local/bin/env
-git clone https://github.com/$VLLM_ORG/$VLLM_REPO.git vllm -b $VLLM_BRANCH
+git clone https://github.com/$VLLM_ORG/$VLLM_REPO.git vllm
 cd vllm
+git fetch origin $VLLM_SHA
+git checkout $VLLM_SHA
 uv venv --seed
 source .venv/bin/activate
-VLLM_USE_PRECOMPILED=1 uv pip install --editable .
+
+# Default VLLM_PRECOMPILED_WHEEL_COMMIT to VLLM_SHA if not set
+# This allows using precompiled binaries from a different commit (e.g., main) while checking out a specific source commit
+VLLM_PRECOMPILED_WHEEL_COMMIT="${VLLM_PRECOMPILED_WHEEL_COMMIT:-${VLLM_SHA}}"
+
+# Detect if precompiled wheel exists (following llm-d approach)
+MACHINE=$(uname -m)
+case "${MACHINE}" in
+  x86_64|amd64) PLATFORM_TAG="manylinux_2_31_x86_64" ;;
+  aarch64|arm64) PLATFORM_TAG="manylinux_2_31_aarch64" ;;
+  *) echo "Unsupported architecture: ${MACHINE}"; PLATFORM_TAG="" ;;
+esac
+
+WHEEL_URL=""
+if [ -n "${PLATFORM_TAG}" ]; then
+  echo "Looking for precompiled wheel at: https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/vllm/"
+  WHEEL_INDEX_HTML=$(curl -sf "https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/vllm/" 2>/dev/null || echo "")
+  if [ -n "${WHEEL_INDEX_HTML}" ]; then
+    WHEEL_FILENAME=$(echo "${WHEEL_INDEX_HTML}" | grep -oE "vllm-[^\"]+${PLATFORM_TAG}\.whl" | head -1)
+    if [ -n "${WHEEL_FILENAME}" ]; then
+      # URL-encode the + sign in the wheel filename
+      WHEEL_URL="https://wheels.vllm.ai/${VLLM_PRECOMPILED_WHEEL_COMMIT}/${WHEEL_FILENAME}"
+      WHEEL_URL=$(echo "${WHEEL_URL}" | sed -E 's/\+/%2B/g')
+      echo "Found precompiled wheel: ${WHEEL_URL}"
+    fi
+  fi
+fi
+
+# Install vLLM with or without precompiled binaries
+if [ -n "${WHEEL_URL}" ]; then
+  echo "Using precompiled binaries from commit: ${VLLM_PRECOMPILED_WHEEL_COMMIT} (source: ${VLLM_SHA})"
+  export VLLM_USE_PRECOMPILED=1
+  export VLLM_PRECOMPILED_WHEEL_LOCATION="${WHEEL_URL}"
+  uv pip install --editable .
+else
+  echo "Compiling vLLM from source (no precompiled wheel found or unsupported platform)"
+  unset VLLM_USE_PRECOMPILED VLLM_PRECOMPILED_WHEEL_LOCATION 2>/dev/null || true
+  uv pip install --editable .
+fi
 
 # Wait for spnl build to complete
 echo "Waiting for spnl build to complete..."
diff --git a/docker/gce/vllm/setup.sh b/docker/gce/vllm/setup.sh
deleted file mode 100644
index 6ce3a520..00000000
--- a/docker/gce/vllm/setup.sh
+++ /dev/null
@@ -1,207 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Note: this script is executed inside the VM, via a cloud-init runcmd. See ./cloud-config.yaml
-#
-
-set -eo pipefail
-
-# DEBUG
-#set -x
-
-cleanup() {
-    rc=$?
-    echo "Exiting with exit_code=$rc"
-    gsutil cp <(echo $rc) gs://$GCS_BUCKET/runs/$RUN_ID/status/exit_code
-}
-trap "cleanup" EXIT
-
-export HOME=/root
-cd $HOME
-
-# TODO: i was expecting this to be loaded automatically. Apparently not if this is run via a cloud-init runcmd.
-. /etc/environment
-
-export SCCACHE_GCS_BUCKET=$GCS_BUCKET
-SCCACHE_VERSION=$(curl -s "https://api.github.com/repos/mozilla/sccache/releases/latest" | grep -Po '"tag_name": "v\K[0-9.]+')
-wget -qO sccache.tar.gz https://github.com/mozilla/sccache/releases/latest/download/sccache-v$SCCACHE_VERSION-x86_64-unknown-linux-musl.tar.gz
-mkdir sccache-temp
-tar xf sccache.tar.gz --strip-components=1 -C sccache-temp
-sudo mv sccache-temp/sccache /usr/local/bin
-sudo chmod a+x /usr/local/bin/sccache
-rm -rf sccache.tar.gz sccache-temp
-export RUSTC_WRAPPER=/usr/local/bin/sccache
-export SCCACHE_GCS_RW_MODE=READ_WRITE
-export SCCACHE_GCS_KEY_PREFIX=sccache
-
-# Install and build spnl
-export CARGO_INCREMENTAL=0 # Disable incremental compilation for faster from-scratch builds
-export CARGO_PROFILE_TEST_DEBUG=0
-
-if [[ -n "$SPNL_RELEASE" ]]
-then
-    echo "Downloading spnl release $SPNL_RELEASE"
-
-    # Detect OS and architecture
-    OS=$(uname -s | tr '[:upper:]' '[:lower:]')
-    ARCH=$(uname -m)
-
-    # Map architecture names to match GitHub release naming
-    case "$ARCH" in
-        x86_64)
-            ARCH="x86_64"
-            ;;
-        aarch64|arm64)
-            ARCH="aarch64"
-            ;;
-        *)
-            echo "Unsupported architecture: $ARCH"
-            exit 1
-            ;;
-    esac
-
-    # Map OS and ABI to match GitHub release naming
-    # Format: spnl-{version}-{os}-{arch}-{abi}.tar.gz
-    case "$OS" in
-        linux)
-            OS="linux"
-            ABI="gnu"
-            ;;
-        darwin)
-            OS="macos"
-            ABI=""
-            ;;
-        *)
-            echo "Unsupported OS: $OS"
-            exit 1
-            ;;
-    esac
-
-    # Construct the asset name
-    if [[ -n "$ABI" ]]; then
-        ASSET_NAME="spnl-${SPNL_RELEASE}-${OS}-${ARCH}-${ABI}.tar.gz"
-    else
-        ASSET_NAME="spnl-${SPNL_RELEASE}-${OS}-${ARCH}.tar.gz"
-    fi
-
-    # Extract repo owner and name from SPNL_GITHUB (e.g., https://github.com/owner/repo)
-    REPO_PATH=$(echo "$SPNL_GITHUB" | sed -E 's|https?://github.com/||' | sed 's|\.git$||')
-
-    # Download the release asset
-    DOWNLOAD_URL="https://github.com/${REPO_PATH}/releases/download/${SPNL_RELEASE}/${ASSET_NAME}"
-    echo "Downloading from: $DOWNLOAD_URL"
-
-    wget -q "$DOWNLOAD_URL" -O spnl-release.tar.gz || {
-        echo "Failed to download release asset. Falling back to building from source."
-        exit 1
-    }
-
-    # Extract and install
-    tar xzf spnl-release.tar.gz
-    sudo cp spnl /usr/local/bin/
-    sudo chmod a+rX /usr/local/bin/spnl
-    rm spnl-release.tar.gz spnl
-
-    # No need to clone repo or build - we'll install Python package from PyPI later
-    spnl_pid=0
-elif [[ -n "$GITHUB_SHA" ]] && [[ -n "$GITHUB_REF" ]]
-then
-    echo "Cloning spnl from GITHUB_SHA=$GITHUB_SHA GITHUB_REF=$GITHUB_REF"
-    (
-        curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
-            source $HOME/.cargo/env && \
-            mkdir spnl && \
-            cd spnl && \
-            git init && \
-            git remote add origin $SPNL_GITHUB && \
-            git fetch --prune --no-recurse-submodules --depth=1 origin +$GITHUB_SHA:$GITHUB_REF && \
-            git checkout --progress --force $GITHUB_REF && \
-            cargo build -F rag,spnl-api,vllm && sudo cp target/debug/spnl /usr/local/bin && sudo chmod a+rX /usr/local/bin/spnl \
-            ) &
-    spnl_pid=$!
-else
-    echo "Cloning spnl from repo=$SPNL_GITHUB"
-    (
-        curl --proto "=https" --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
-            source $HOME/.cargo/env && \
-            git clone $SPNL_GITHUB spnl && \
-            cd spnl && \
-            cargo build -F rag,spnl-api,vllm && sudo cp target/debug/spnl /usr/local/bin && sudo chmod a+rX /usr/local/bin/spnl \
-            ) &
-    spnl_pid=$!
-fi
-
-# Install vLLM
-curl -LsSf https://astral.sh/uv/install.sh | sh
-source $HOME/.local/bin/env
-git clone https://github.com/$VLLM_ORG/$VLLM_REPO.git vllm -b $VLLM_BRANCH
-cd vllm
-uv venv --seed
-source .venv/bin/activate
-VLLM_USE_PRECOMPILED=1 uv pip install --editable .
-
-# Wait for spnl build to complete (if building from source)
-if [[ $spnl_pid -ne 0 ]]; then
-    echo "Waiting for spnl build to complete..."
-    wait $spnl_pid
-    echo "spnl build completed"
-fi
-
-# Patch the vllm code and install spnl Python package
-spnl vllm patchfile | git apply
-
-if [[ -n "$SPNL_RELEASE" ]]
-then
-    # Install spnl from PyPI (strip 'v' prefix if present)
-    SPNL_VERSION="${SPNL_RELEASE#v}"
-    echo "Installing spnl==$SPNL_VERSION from PyPI"
-    uv pip install "spnl==$SPNL_VERSION"
-else
-    # Build the cloned version of spnl into vLLM, via maturin
-    uv pip install maturin[patchelf]
-    source $HOME/.cargo/env # to get rustc on path
-    (cd $HOME/spnl && maturin develop -F tok,run_py -m spnl/Cargo.toml)
-fi
-
-# Start vLLM
-VLLM_ATTENTION_BACKEND=TRITON_ATTN \
-    VLLM_USE_V1=1 \
-    VLLM_V1_SPANS_ENABLED=True \
-    VLLM_V1_SPANS_TOKEN_PLUS=10 \
-    VLLM_V1_SPANS_TOKEN_CROSS=13 \
-    VLLM_SERVER_DEV_MODE=1 \
-    nohup vllm serve $MODEL --enforce-eager &
-
-# Install ollama (for embedding)
-(curl -fsSL https://ollama.com/install.sh | sh && ollama serve) &
-
-# Wait till vllm is ready
-timeout 5m bash -c 'until curl --output /dev/null --silent --fail http://localhost:8000/health; do sleep 3; done'
-echo "vllm is ready"
-
-# Wait till ollama is ready
-#timeout 5m bash -c 'until curl --output /dev/null --silent --fail http://localhost:11434; do sleep 3; done'
-timeout 5m bash -c 'until ollama ps; do sleep 3; done'
-echo "ollama is ready"
-
-# Run tests only if not using a release (releases are for production, not testing)
-if [[ -z "$SPNL_RELEASE" ]]
-then
-    # Here are the variables we will allow to be used in the test.d/* scripts
-    declare -x GCS_BUCKET
-    declare -x RUN_ID
-    declare -x MODEL
-    declare -x OPENAI_API_BASE=http://localhost:8000/v1
-
-    cd $HOME
-    TESTS_DIR=$HOME/spnl/docker/gce/vllm/test.d
-    if [ -d "$TESTS_DIR" ]
-    then
-        n_tests=$(ls "$TESTS_DIR" | wc -l | xargs)
-        echo "Starting $n_tests tests"
-        find "$TESTS_DIR" -type f -name '*.sh' -print0 | xargs -0L1 -I{} bash -c 'echo "Executing {} at $(date -u)"; "{}"'
-    else echo "No tests found in $TESTS_DIR"
-    fi
-else
-    echo "Skipping tests (SPNL_RELEASE is set)"
-fi
diff --git a/docker/vllm/llm-d/Containerfile.cuda b/docker/vllm/llm-d/Containerfile.cuda
index 6f8f4812..438c52bb 100644
--- a/docker/vllm/llm-d/Containerfile.cuda
+++ b/docker/vllm/llm-d/Containerfile.cuda
@@ -1,4 +1,4 @@
-ARG LLMD_VERSION=0.4.0
+ARG LLMD_VERSION=0.5.0
 ARG MANYLINUX_VERSION=2_34 # use 2_39 for aarch64; TODO is it possible to infer this from the build platform?
 
 # Python version extractor
@@ -29,7 +29,7 @@ LABEL org.opencontainers.image.source=https://github.com/IBM/spnl
 LABEL org.opencontainers.image.description="Span Query support for llm-d's vLLM"
 LABEL org.opencontainers.image.licenses="Apache-2.0"
 
-ARG LLMD_VERSION=0.4.0 # sigh, we need to repeat this if we want to use it inside of the FROM
+ARG LLMD_VERSION=0.5.0 # sigh, we need to repeat this if we want to use it inside of the FROM
 
 COPY --from=builder target/wheels/ /tmp/wheels
 COPY docker/vllm/llm-d/patches/$LLMD_VERSION/ /tmp/patches
diff --git a/docker/vllm/llm-d/clone.sh b/docker/vllm/llm-d/clone.sh
index 4625c864..9bc22aa9 100755
--- a/docker/vllm/llm-d/clone.sh
+++ b/docker/vllm/llm-d/clone.sh
@@ -2,12 +2,14 @@
 
 set -e
 
-LLMD_VERSION=0.4.0
-BASE_VLLM_FORK=https://github.com/neuralmagic/vllm.git
-BASE_VLLM_BRANCH=llm-d-release-0.4
+LLMD_VERSION=0.5.0
+BASE_VLLM_FORK=https://github.com/vllm-project/vllm.git
+BASE_VLLM_COMMIT_SHA=d7de043d55d1dd629554467e23874097e1c48993
 
-git clone $BASE_VLLM_FORK -b $BASE_VLLM_BRANCH --depth 1
+git clone $BASE_VLLM_FORK vllm
 cd vllm
+git fetch --depth=1 origin $BASE_VLLM_COMMIT_SHA
+git checkout -q $BASE_VLLM_COMMIT_SHA
 
 for patchfile in ../patches/$LLMD_VERSION/*.patch.gz
 do git apply <(gunzip -c $patchfile) --reject
diff --git a/docker/vllm/llm-d/genpatch.sh b/docker/vllm/llm-d/genpatch.sh
index 74d4449d..ff9918e9 100755
--- a/docker/vllm/llm-d/genpatch.sh
+++ b/docker/vllm/llm-d/genpatch.sh
@@ -7,25 +7,24 @@ SCRIPTDIR=$(cd $(dirname "$0") && pwd)
 SPANS_VLLM_FORK=https://github.com/starpit/vllm-ibm.git
 SPANS_VLLM_BRANCH=spnl-ibm
 
-LLMD_VERSION=0.4.0
-BASE_VLLM_FORK=https://github.com/neuralmagic/vllm.git
-BASE_VLLM_BRANCH=llm-d-release-0.4
+LLMD_VERSION=0.5.0
+BASE_VLLM_FORK=https://github.com/vllm-project/vllm.git
+BASE_VLLM_COMMIT_SHA=d7de043d55d1dd629554467e23874097e1c48993
 
-T=$(mktemp -d)
-trap "rm -rf $T" EXIT
+T=vllm
+#trap "rm -rf $T" EXIT
 
-git clone $BASE_VLLM_FORK $T/vllm-llmd -b $BASE_VLLM_BRANCH
+git clone $BASE_VLLM_FORK $T/vllm-llmd
 cd $T/vllm-llmd
-BASE_VLLM_REVISION=$(git rev-parse --verify HEAD)
+git fetch origin $BASE_VLLM_COMMIT_SHA
+git checkout -q $BASE_VLLM_COMMIT_SHA
+BASE_VLLM_REVISION=$BASE_VLLM_COMMIT_SHA
 
 git remote add spans $SPANS_VLLM_FORK
 git fetch spans $SPANS_VLLM_BRANCH
-git checkout $SPANS_VLLM_BRANCH
-SPANS_VLLM_REVISION=$(git rev-parse --verify HEAD)
 
-git checkout $BASE_VLLM_BRANCH
-git rebase spans/$SPANS_VLLM_BRANCH -C0
+git rebase spans/$SPANS_VLLM_BRANCH -C0 
 
 # Notes: gzip --no-name ensures deterministic output (gzip won't save mtime in the file); this helps with git sanity
-mkdir -p "$SCRIPTDIR"/patches
+mkdir -p "$SCRIPTDIR"/patches/$LLMD_VERSION
 git diff $BASE_VLLM_REVISION | gzip --no-name -c > "$SCRIPTDIR"/patches/$LLMD_VERSION/01-spans-llmd-vllm.patch.gz
diff --git a/docker/vllm/llm-d/patches/0.5.0/01-spans-llmd-vllm.patch.gz b/docker/vllm/llm-d/patches/0.5.0/01-spans-llmd-vllm.patch.gz
new file mode 100644
index 00000000..e7fa2e27
Binary files /dev/null and b/docker/vllm/llm-d/patches/0.5.0/01-spans-llmd-vllm.patch.gz differ
diff --git a/spnl/src/vllm/gce/args.rs b/spnl/src/vllm/gce/args.rs
index 1d5ffcda..2ffc2f2e 100644
--- a/spnl/src/vllm/gce/args.rs
+++ b/spnl/src/vllm/gce/args.rs
@@ -52,9 +52,19 @@ pub struct GceConfig {
     #[arg(long, env = "VLLM_REPO", default_value = "vllm")]
     pub vllm_repo: String,
 
-    /// vLLM branch to use
-    #[arg(long, env = "VLLM_BRANCH", default_value = "llm-d-release-0.4")]
-    pub vllm_branch: String,
+    /// vLLM commit SHA to use
+    #[arg(long, env = "VLLM_SHA", default_value = "a1b2c3d4e5f6")]
+    pub vllm_sha: String,
+
+    /// vLLM commit SHA to use for precompiled wheel lookup (defaults to vllm_sha)
+    /// This allows using stable precompiled binaries from a known commit (e.g., main)
+    /// while checking out a different source commit for testing
+    #[arg(
+        long,
+        env = "VLLM_PRECOMPILED_WHEEL_COMMIT",
+        default_value = "d7de043d55d1dd629554467e23874097e1c48993"
+    )]
+    pub vllm_precompiled_wheel_commit: String,
 }
 
 impl GceConfig {
@@ -72,7 +82,8 @@ impl GceConfig {
             github_ref: None,
             vllm_org: "neuralmagic".to_string(),
             vllm_repo: "vllm".to_string(),
-            vllm_branch: "llm-d-release-0.4".to_string(),
+            vllm_sha: "a1b2c3d4e5f6".to_string(),
+            vllm_precompiled_wheel_commit: "d7de043d55d1dd629554467e23874097e1c48993".to_string(),
         }
     }
 
@@ -126,7 +137,11 @@ mod tests {
         assert_eq!(config.spnl_github, None);
         assert_eq!(config.vllm_org, "neuralmagic");
         assert_eq!(config.vllm_repo, "vllm");
-        assert_eq!(config.vllm_branch, "llm-d-release-0.4");
+        assert_eq!(config.vllm_sha, "a1b2c3d4e5f6");
+        assert_eq!(
+            config.vllm_precompiled_wheel_commit,
+            "d7de043d55d1dd629554467e23874097e1c48993"
+        );
     }
 
     #[test]
diff --git a/spnl/src/vllm/gce/image.rs b/spnl/src/vllm/gce/image.rs
index 8756ed01..d9a0f947 100644
--- a/spnl/src/vllm/gce/image.rs
+++ b/spnl/src/vllm/gce/image.rs
@@ -1,11 +1,11 @@
 use super::args::GceConfig;
 
 /// Generate image name from patch content hash and vLLM source identifier
-fn generate_image_name(
+pub fn generate_image_name(
     patch_content: &[u8],
     vllm_org: &str,
     vllm_repo: &str,
-    vllm_branch: &str,
+    vllm_sha: &str,
 ) -> String {
     use sha2::{Digest, Sha256};
 
@@ -13,7 +13,7 @@ fn generate_image_name(
     let patch_hash = format!("{:x}", Sha256::digest(patch_content));
 
     // Create vLLM source identifier
-    let vllm_source_id = format!("{}/{}@{}", vllm_org, vllm_repo, vllm_branch);
+    let vllm_source_id = format!("{}/{}@{}", vllm_org, vllm_repo, vllm_sha);
 
     // Combine and hash (GCE image names have 63 char limit, format is "vllm-spnl-{hash}")
     let combined = format!("{}{}", patch_hash, vllm_source_id);
@@ -41,12 +41,12 @@ pub struct ImageCreateArgs {
     #[builder(setter(into), default = "vllm".to_string())]
     pub(crate) vllm_repo: String,
 
-    /// vLLM branch to use
-    #[builder(setter(into), default = "llm-d-release-0.4".to_string())]
-    pub(crate) vllm_branch: String,
+    /// vLLM commit SHA to use
+    #[builder(setter(into), default = "a1b2c3d4e5f6".to_string())]
+    pub(crate) vllm_sha: String,
 
     /// LLM-D version for patch file
-    #[builder(setter(into), default = "0.4.0".to_string())]
+    #[builder(setter(into), default = "0.5.0".to_string())]
     pub(crate) llmd_version: String,
 
     /// Custom image name (defaults to auto-generated from hash)
@@ -66,7 +66,7 @@ pub struct ImageCreateArgs {
 fn generate_startup_script(
     vllm_org: &str,
     vllm_repo: &str,
-    vllm_branch: &str,
+    vllm_sha: &str,
     patch_content_b64: &str,
 ) -> String {
     format!(
@@ -97,8 +97,10 @@ sudo resize2fs /dev/sda1 2>/dev/null || true
 echo "=== Installing vLLM ==="
 curl -LsSf https://astral.sh/uv/install.sh | sh
 source $HOME/.local/bin/env
-git clone https://github.com/{}/{}.git vllm -b {}
+git clone https://github.com/{}/{}.git vllm
 cd vllm
+git fetch origin {}
+git checkout {}
 
 echo "=== Applying vLLM patch ==="
 # Decode the embedded patch file
@@ -179,7 +181,7 @@ sudo rm -rf /var/lib/apt/lists/*
 
 echo "=== Image preparation complete ==="
 "#,
-        vllm_org, vllm_repo, vllm_branch, patch_content_b64
+        vllm_org, vllm_repo, vllm_sha, vllm_sha, patch_content_b64
     )
 }
 
@@ -423,7 +425,7 @@ struct ImageCreationParams<'a> {
     project: &'a str,
     vllm_org: &'a str,
     vllm_repo: &'a str,
-    vllm_branch: &'a str,
+    vllm_sha: &'a str,
     llmd_version: &'a str,
 }
 
@@ -435,8 +437,8 @@ async fn create_image_from_disk(params: ImageCreationParams<'_>) -> anyhow::Resu
     let client = Images::builder().build().await?;
 
     let description = format!(
-        "vLLM custom image with VLLM_ORG={}, VLLM_REPO={}, VLLM_BRANCH={}, LLMD_VERSION={}",
-        params.vllm_org, params.vllm_repo, params.vllm_branch, params.llmd_version
+        "vLLM custom image with VLLM_ORG={}, VLLM_REPO={}, VLLM_SHA={}, LLMD_VERSION={}",
+        params.vllm_org, params.vllm_repo, params.vllm_sha, params.llmd_version
     );
 
     eprintln!("Creating custom image: {}", params.image_name);
@@ -560,7 +562,7 @@ pub async fn create_image(args: ImageCreateArgs) -> anyhow::Result<String> {
     eprintln!("Configuration:");
     eprintln!("  VLLM_ORG: {}", args.vllm_org);
     eprintln!("  VLLM_REPO: {}", args.vllm_repo);
-    eprintln!("  VLLM_BRANCH: {}", args.vllm_branch);
+    eprintln!("  VLLM_SHA: {}", args.vllm_sha);
     eprintln!("  LLMD_VERSION: {}", args.llmd_version);
     eprintln!("  IMAGE_FAMILY: {}", args.image_family);
     eprintln!("  IMAGE_PROJECT: {}", project);
@@ -568,12 +570,12 @@ pub async fn create_image(args: ImageCreateArgs) -> anyhow::Result<String> {
 
     // Embed patch file at compile time based on LLMD version
     let patch_content = match args.llmd_version.as_str() {
-        "0.4.0" => {
-            include_bytes!("../../../docker/vllm/llm-d/patches/0.4.0/01-spans-llmd-vllm.patch.gz")
+        "0.5.0" => {
+            include_bytes!("../../../docker/vllm/llm-d/patches/0.5.0/01-spans-llmd-vllm.patch.gz")
         }
         _ => {
             return Err(anyhow::anyhow!(
-                "Unsupported LLMD version: {}. Only 0.4.0 is currently supported.",
+                "Unsupported LLMD version: {}. Only 0.5 is currently supported.",
                 args.llmd_version
             ));
         }
@@ -592,7 +594,7 @@ pub async fn create_image(args: ImageCreateArgs) -> anyhow::Result<String> {
             patch_content,
             &args.vllm_org,
             &args.vllm_repo,
-            &args.vllm_branch,
+            &args.vllm_sha,
         )
     };
 
@@ -626,7 +628,7 @@ pub async fn create_image(args: ImageCreateArgs) -> anyhow::Result<String> {
     let startup_script = generate_startup_script(
         &args.vllm_org,
         &args.vllm_repo,
-        &args.vllm_branch,
+        &args.vllm_sha,
         &patch_content_b64,
     );
 
@@ -654,7 +656,7 @@ pub async fn create_image(args: ImageCreateArgs) -> anyhow::Result<String> {
         project: &project,
         vllm_org: &args.vllm_org,
         vllm_repo: &args.vllm_repo,
-        vllm_branch: &args.vllm_branch,
+        vllm_sha: &args.vllm_sha,
         llmd_version: &args.llmd_version,
     })
     .await?;
@@ -701,8 +703,8 @@ mod tests {
             .force_overwrite(true)
             .vllm_org("test-org")
             .vllm_repo("test-repo")
-            .vllm_branch("test-branch")
-            .llmd_version("0.4.0")
+            .vllm_sha("abc123def456")
+            .llmd_version("0.5.0")
             .image_family("test-family")
             .build()
             .unwrap();
@@ -710,8 +712,8 @@ mod tests {
         assert!(args.force_overwrite);
         assert_eq!(args.vllm_org, "test-org");
         assert_eq!(args.vllm_repo, "test-repo");
-        assert_eq!(args.vllm_branch, "test-branch");
-        assert_eq!(args.llmd_version, "0.4.0");
+        assert_eq!(args.vllm_sha, "abc123def456");
+        assert_eq!(args.llmd_version, "0.5.0");
         assert_eq!(args.image_family, "test-family");
     }
 }
diff --git a/spnl/src/vllm/gce/up.rs b/spnl/src/vllm/gce/up.rs
index e55df6d1..fd1e5e93 100644
--- a/spnl/src/vllm/gce/up.rs
+++ b/spnl/src/vllm/gce/up.rs
@@ -71,7 +71,8 @@ fn load_cloud_config(args: &UpArgs) -> anyhow::Result<String> {
     };
     let vllm_org = &args.config.vllm_org;
     let vllm_repo = &args.config.vllm_repo;
-    let vllm_branch = &args.config.vllm_branch;
+    let vllm_sha = &args.config.vllm_sha;
+    let vllm_precompiled_wheel_commit = &args.config.vllm_precompiled_wheel_commit;
     let model = args
         .model
         .clone()
@@ -187,7 +188,11 @@ cloud_final_modules: []"#
     substitutions.insert("spnl_release", spnl_release.as_str());
     substitutions.insert("vllm_org", vllm_org.as_str());
     substitutions.insert("vllm_repo", vllm_repo.as_str());
-    substitutions.insert("vllm_branch", vllm_branch.as_str());
+    substitutions.insert("vllm_sha", vllm_sha.as_str());
+    substitutions.insert(
+        "vllm_precompiled_wheel_commit",
+        vllm_precompiled_wheel_commit.as_str(),
+    );
     substitutions.insert("model", model.as_str());
     substitutions.insert("packages_section", &packages_section);
     substitutions.insert("setup_dev_script", &setup_dev_script);
@@ -248,11 +253,19 @@ pub async fn up(args: UpArgs) -> anyhow::Result<()> {
     // Determine which image to use
     let source_image = if is_dev_mode {
         // Dev mode: use standard Ubuntu accelerator image
-        "projects/ubuntu-os-accelerator-images/global/images/ubuntu-accelerator-2404-amd64-with-nvidia-580-v20251210".to_string()
+        "projects/ubuntu-os-accelerator-images/global/images/ubuntu-accelerator-2404-amd64-with-nvidia-580-v20260203".to_string()
     } else {
         // Production mode: use custom image based on vLLM configuration
-        // Use the image family to get the latest image
-        format!("projects/{}/global/images/family/vllm-spnl", project)
+        // Generate the exact image name using the same logic as image creation
+        let patch_content =
+            include_bytes!("../../../docker/vllm/llm-d/patches/0.5.0/01-spans-llmd-vllm.patch.gz");
+        let image_name = super::image::generate_image_name(
+            patch_content,
+            &args.config.vllm_org,
+            &args.config.vllm_repo,
+            &args.config.vllm_sha,
+        );
+        format!("projects/{}/global/images/{}", project, image_name)
     };
 
     #[derive(Tabled)]
@@ -314,6 +327,15 @@ pub async fn up(args: UpArgs) -> anyhow::Result<()> {
         "v2-x86-template-1-4-0".to_string(),
     );
 
+    // Configure disk based on mode
+    // Dev mode needs more space for compilation (300GB pd-balanced)
+    // Production mode uses smaller, faster disk (100GB pd-ssd)
+    let (disk_size_gb, disk_type) = if is_dev_mode {
+        (300, format!("zones/{}/diskTypes/pd-balanced", zone))
+    } else {
+        (100, format!("zones/{}/diskTypes/pd-ssd", zone))
+    };
+
     // Create the instance configuration matching the terraform file
     let instance = Instance::new()
         .set_name(&instance_name)
@@ -325,8 +347,8 @@ pub async fn up(args: UpArgs) -> anyhow::Result<()> {
             .set_initialize_params(
                 AttachedDiskInitializeParams::new()
                     .set_source_image(&source_image)
-                    .set_disk_size_gb(100)
-                    .set_disk_type(format!("zones/{}/diskTypes/pd-ssd", zone)),
+                    .set_disk_size_gb(disk_size_gb)
+                    .set_disk_type(disk_type),
             )
             .set_mode("READ_WRITE")])
         .set_network_interfaces([NetworkInterface::new()
diff --git a/spnl/src/vllm/patch.rs b/spnl/src/vllm/patch.rs
index 2d3a7424..d424d660 100644
--- a/spnl/src/vllm/patch.rs
+++ b/spnl/src/vllm/patch.rs
@@ -1,7 +1,7 @@
 use std::io::{Read, Write};
 
 const PATCH_DATA: &[u8] =
-    include_bytes!("../../docker/vllm/llm-d/patches/0.4.0/01-spans-llmd-vllm.patch.gz");
+    include_bytes!("../../docker/vllm/llm-d/patches/0.5.0/01-spans-llmd-vllm.patch.gz");
 
 /// Emit the vLLM patchfile to stdout
 ///