diff --git a/.github/actions/install_neuronx_runtime/action.yml b/.github/actions/install_neuronx_runtime/action.yml
index b09df2f45..9444ed7e0 100644
--- a/.github/actions/install_neuronx_runtime/action.yml
+++ b/.github/actions/install_neuronx_runtime/action.yml
@@ -3,19 +3,104 @@ description: install system and python packages for an AWS Neuronx SDK version
 runs:
     using: "composite"
     steps:
+      - name: Set Neuronx deb variables
+        shell: bash
+        run: |
+          NEURON_DEBS_LIST="aws-neuronx-tools=2.26.14.0 aws-neuronx-runtime-lib=2.28.23.0-dd5879008 aws-neuronx-collectives=2.28.27.0-bc30ece58"
+          echo "APT_CACHE_DIR=/mnt/hf_cache/apt-cache" >> $GITHUB_ENV
+          echo "NEURON_DEBS=${NEURON_DEBS_LIST}" >> $GITHUB_ENV
+          echo "NEURON_CACHE_KEY=$(echo -n \"${NEURON_DEBS_LIST}\" | sha256sum | awk '{print $1}')" >> $GITHUB_ENV
+      - name: Prepare cache directory and download Neuronx system dependencies
+        shell: bash
+        run: |
+          CACHE_DIR_KEY="${APT_CACHE_DIR}/${NEURON_CACHE_KEY}"
+          LOCKDIR="${CACHE_DIR_KEY}.lock"
+          READY_MARKER="${CACHE_DIR_KEY}/.ready"
+          LOCK_ACQUIRED=false
+          MAX_WAIT=300  # 5 minutes timeout
+
+          # Create cache directory structure if it doesn't exist
+          sudo mkdir -p "${APT_CACHE_DIR}" 2>/dev/null || true
+          sudo chmod 777 "${APT_CACHE_DIR}" 2>/dev/null || true
+
+          # Try to acquire lock with timeout (mkdir is atomic even on NFS)
+          for i in $(seq 1 $MAX_WAIT); do
+            if mkdir "${LOCKDIR}" 2>/dev/null; then
+              LOCK_ACQUIRED=true
+              echo "🔒 Lock acquired (attempt $i)"
+
+              # Set up cleanup trap to remove lock directory on exit
+              trap "rmdir '${LOCKDIR}' 2>/dev/null || true" EXIT
+
+              # Check if cache needs to be built
+              if [ ! -f "${READY_MARKER}" ]; then
+                echo "📦 Downloading Neuronx system dependencies..."
+                sudo mkdir -p "${CACHE_DIR_KEY}"
+                sudo chmod 777 "${CACHE_DIR_KEY}"
+                . /etc/os-release
+                sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null <<EOF
+          deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main
+          EOF
+                wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
+                sudo apt-get update -qq
+                sudo apt-get install -y --download-only \
+                  -o Dir::Cache::archives="${CACHE_DIR_KEY}" \
+                  ${NEURON_DEBS}
+                touch "${READY_MARKER}"
+                echo "✅ Download complete: ${CACHE_DIR_KEY}"
+
+              else
+                echo "✅ Cache already ready (another runner downloaded it)"
+              fi
+
+              # Release lock
+              rmdir "${LOCKDIR}" 2>/dev/null || true
+              trap - EXIT
+              break
+            fi
+
+            # Lock not acquired, wait and retry
+            if [ $i -eq 1 ]; then
+              echo "⏳ Waiting for another runner to complete download..."
+            fi
+            sleep 1
+          done
+
+          if [ "$LOCK_ACQUIRED" = "false" ]; then
+            echo "⚠️ Lock timeout after ${MAX_WAIT}s - will install directly from apt"
+          fi
       - name: Install Neuronx system packages
         shell: bash
         run: |
-          . /etc/os-release
-          sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null <<EOF
+          CACHE_DIR_KEY="${APT_CACHE_DIR}/${NEURON_CACHE_KEY}"
+          READY_MARKER="${CACHE_DIR_KEY}/.ready"
+
+          if [ -f "${READY_MARKER}" ]; then
+            echo "📦 Installing from cache: ${CACHE_DIR_KEY}"
+
+            # Show packages being installed in a collapsible group
+            echo "::group::📋 Packages to install"
+            cd ${CACHE_DIR_KEY}
+            ls -lh *.deb || echo "❌ No .deb files found"
+            echo "📊 Total: $(ls -1 *.deb 2>/dev/null | wc -l) packages"
+            echo "::endgroup::"
+
+            sudo dpkg -i "${CACHE_DIR_KEY}"/*.deb
+          else
+            echo "📦 Cache not available, installing directly from apt..."
+            . /etc/os-release
+            sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null <<EOF
           deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main
           EOF
-          wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
-          sudo apt-get update -y
-          sudo apt-get install aws-neuronx-tools=2.26.14.0 aws-neuronx-runtime-lib=2.28.23.0-dd5879008 aws-neuronx-collectives=2.28.27.0-bc30ece58 -y
+            wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
+            sudo apt-get update -qq
+            sudo apt-get install -y ${NEURON_DEBS}
+          fi
+
           export PATH=/opt/aws/neuron/bin:$PATH
+          echo "PATH=${PATH}" >> $GITHUB_ENV
           dpkg -l | grep neuron
       - name: Display driver version
         shell: bash
         run: |
-          apt show aws-neuronx-dkms
+          cat /sys/module/neuron/version || echo "Neuron driver version not available"
diff --git a/.github/workflows/cleanup_inf2_cache.yml b/.github/workflows/cleanup_inf2_cache.yml
new file mode 100644
index 000000000..50250a302
--- /dev/null
+++ b/.github/workflows/cleanup_inf2_cache.yml
@@ -0,0 +1,34 @@
+# The workflow file for cleaning up Inf2 cache
+# It can be triggered by dispatch event or scheduler, to remove unused cache files on /mnt/hf_cache apt entries
+name: Cleanup Inf2 Cache
+on:
+  workflow_dispatch:
+  schedule:
+    # Schedule the workflow to run every second day at midnight UTC
+    - cron: '0 0 */2 * *'
+
+jobs:
+    do-the-job:
+      name: Apt cache cleanup
+      runs-on:
+        group: aws-inf2-8xlarge
+      steps:
+        - name: Apt cache cleanup
+          run: |
+            # list all /mnt/hf_cache/apt-cache subdirectories that do not contain a .lock directory
+            for dir in /mnt/hf_cache/apt-cache/*; do
+              if [[ -d "$dir" && ! -d "$dir/.lock" ]]; then
+                # get the size of the directory
+                size=$(sudo du -sh "$dir" | awk '{print $1}')
+                echo "Removing $dir ($size)"
+                sudo rm -rf "$dir"
+              fi
+            done
+            # list all /mnt/hf_cache/apt-cache subdirectories that contain a .lock directory
+            for dir in /mnt/hf_cache/apt-cache/*; do
+              if [[ -d "$dir" && -d "$dir/.lock" ]]; then
+                # get the size of the directory
+                size=$(sudo du -sh "$dir" | awk '{print $1}')
+                echo "Keeping $dir ($size) that is locked"
+              fi
+            done
diff --git a/.gitignore b/.gitignore
index dc61514d0..b7cc22ab8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -134,3 +134,6 @@ dmypy.json
 
 .vscode
 neuronxcc*/
+
+# Ignore claude settings
+.claude/
diff --git a/tests/decoder/test_decoder_export.py b/tests/decoder/test_decoder_export.py
index 46871d578..7e3b3b9a4 100644
--- a/tests/decoder/test_decoder_export.py
+++ b/tests/decoder/test_decoder_export.py
@@ -106,6 +106,7 @@ def test_decoder_export_hub_models(
     _test_decoder_export_save_reload(model_id=any_decoder_model, is_local=False, load_weights=False)
 
 
+# TODO: remove this test, it is just a test for CI
 @pytest.mark.parametrize("is_local", [True, False], ids=["local", "from_hub"])
 @pytest.mark.parametrize("load_weights", [True, False], ids=["with-weights", "without-weights"])
 def test_decoder_export_save_reload(
diff --git a/tests/vllm/service/test_vllm_agentic.py b/tests/vllm/service/test_vllm_agentic.py
index dde0db934..53d0a89f6 100644
--- a/tests/vllm/service/test_vllm_agentic.py
+++ b/tests/vllm/service/test_vllm_agentic.py
@@ -44,6 +44,7 @@ async def greedy_with_tools(
     return generated_tokens, generated_text, tool_calls
 
 
+# TODO: remove this comment, it is just a test for CI
 # Note: we use Qwen3-0.6B as a test model because it is a small model that is easy to test and it supports tool calling.
 @pytest.mark.asyncio
 @pytest.mark.parametrize("neuron_llm_config", ["qwen3-1x8192"], indirect=True)