LLM360 · DavidBellamy · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -6,6 +6,6 @@
 /miles/backends/sglang_utils/ @fzyzcjy @yueming-yuan @maocheng23 @yushengsu-thu
 /miles/ray/ @fzyzcjy @yueming-yuan @maocheng23
 /miles/rollout/ @fzyzcjy @yueming-yuan @guapisolo
-/miles/rollout/session/ @fzyzcjy @yueming-yuan @guapisolo @maocheng23
+/miles/rollout/session/ @fzyzcjy @yueming-yuan @guapisolo @maocheng23 @jybsuper
 /miles/router/ @fzyzcjy @yueming-yuan @guapisolo
-/miles/utils/ @fzyzcjy @yueming-yuan @guapisolo @maocheng23
+/miles/utils/ @fzyzcjy @yueming-yuan @guapisolo @maocheng23 @jybsuper
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -146,6 +146,11 @@ jobs:
             ${{ inputs.custom_tag && format('--custom-tag {0}', inputs.custom_tag) || '' }} \
             --push
 
+      - name: Point latest to current dev
+        if: github.event_name == 'schedule' || inputs.simulate_schedule == true
+        run: |
+          docker buildx imagetools create -t radixark/miles:latest radixark/miles:dev
+
       - name: Prune old dev tags
         if: github.event_name == 'schedule'
         run: |
@@ -193,3 +198,33 @@ jobs:
               echo "  Failed to delete ${TAG} (HTTP ${HTTP_CODE})"
             fi
           done
+
+  build-and-push-dev-glm:
+    needs: [build-and-push]
+    # Only rebuild dev-glm when the dev image was built (schedule, push to main, or dispatch with image_tag=dev)
+    if: needs.build-and-push.result == 'success' && (github.event_name == 'schedule' || inputs.simulate_schedule == true)
+    runs-on: self-hosted
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+        with:
+          driver-opts: |
+            image=moby/buildkit:latest
+            network=host
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push dev-glm
+        run: |
+          docker buildx build \
+            -f docker/glm5/Dockerfile.dev-glm \
+            -t radixark/miles:dev-glm \
+            --push \
+            .
diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
diff --git a/.github/workflows/pr-test.yml.j2 b/.github/workflows/pr-test.yml.j2
@@ -1,10 +1,11 @@
 <% set default_image = 'radixark/miles:dev' %>
 
 <% set fsdp_tests = [
-    {'test_file': 'e2e/fsdp/test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 8},
-    {'test_file': 'e2e/fsdp/test_qwen3_vl_4B_fsdp.py', 'num_gpus': 8},
-    {'test_file': 'e2e/fsdp/test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 8},
-    {'test_file': 'e2e/fsdp/test_qwen3_0.6B_megatron_fsdp_align.py', 'num_gpus': 8},
+    {'name': '[FSDP] qwen3-4B-fsdp-true-on-policy', 'test_file': 'e2e/fsdp/test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 8},
+    {'name': '[FSDP] qwen3-vl-4B-fsdp', 'test_file': 'e2e/fsdp/test_qwen3_vl_4B_fsdp.py', 'num_gpus': 8},
+    {'name': '[FSDP] qwen3-0.6B-fsdp-distributed', 'test_file': 'e2e/fsdp/test_qwen3_0.6B_fsdp_distributed.py', 'num_gpus': 8},
+    {'name': '[FSDP] qwen3-0.6B-megatron-fsdp-align', 'test_file': 'e2e/fsdp/test_qwen3_0.6B_megatron_fsdp_align.py', 'num_gpus': 8},
+    {'name': '[FSDP] qwen3-0.6B-fsdp-colocated-2xGPU', 'test_file': 'e2e/short/test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 8},
 ] %>
 
 <% set megatron_tests = [
@@ -27,7 +28,6 @@
 <% set short_tests = [
     {'test_file': 'e2e/short/test_qwen2.5_0.5B_gsm8k_async_short.py', 'num_gpus': 8},
     {'test_file': 'e2e/short/test_qwen2.5_0.5B_gsm8k_short.py', 'num_gpus': 8},
-    {'test_file': 'e2e/short/test_qwen3_0.6B_fsdp_colocated_2xGPU.py', 'num_gpus': 8},
     {'test_file': 'e2e/sglang_config/test_sglang_config.py', 'num_gpus': 8},
     {'test_file': 'e2e/sglang_config/test_sglang_config_mixed_offload.py', 'num_gpus': 8},
     {'test_file': 'e2e/sglang_config/test_sglang_config_mixed_offload_ft.py', 'num_gpus': 8},
@@ -67,12 +67,6 @@
         {'test_file': 'utils/test_sglang_config.py', 'num_gpus': 0},
       ],
     },
-    'unit-test': {
-      'label': 'run-unit-test',
-      'tests': [
-        {'test_file': 'e2e/fsdp/test_qwen3_4B_fsdp_true_on_policy.py', 'num_gpus': 8}
-      ],
-    },
     'e2e-test-sglang': {
       'label': 'run-ci-sglang',
       'test_executor': 'pytest',
@@ -82,6 +76,8 @@
         {'test_file': 'e2e/sglang/test_session_server_tool_call.py', 'num_gpus': 1, 'model_family': 'glm47'},
         {'test_file': 'e2e/sglang/test_tito_logprob_equivalence.py', 'num_gpus': 1, 'model_family': 'qwen3'},
         {'test_file': 'e2e/sglang/test_tito_logprob_equivalence.py', 'num_gpus': 1, 'model_family': 'glm47'},
+        {'test_file': 'e2e/sglang/test_r3_router_equivalence.py', 'num_gpus': 1, 'model_family': 'qwen3_30b_a3b'},
+        {'test_file': 'e2e/sglang/test_r3_router_equivalence.py', 'num_gpus': 1, 'model_family': 'glm47_flash'},
       ],
     },
     'e2e-test-short': {
@@ -94,7 +90,7 @@
     },
     'e2e-test-megatron': {
       'label': 'run-ci-megatron',
-      'tests': megatron_tests,
+      'tests': megatron_tests + lora_tests,
     },
     'e2e-test-precision': {
       'label': 'run-ci-precision',
@@ -197,6 +193,7 @@ jobs:
       MILES_TEST_ENABLE_EVAL: ${{ matrix.info.enable_eval || '1' }}
       MILES_TEST_FEW_GPU: '0'
       SESSION_TEST_MODEL_FAMILY: ${{ matrix.info.model_family || '' }}
+      ROUTER_EQ_MODEL_FAMILY: ${{ matrix.info.model_family || '' }}
 
     steps:
       - name: Checkout repository

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -3,10 +3,10 @@
 #
 # 2. radixark/miles:dev-cu13-arm64
 #    build-arg:ENABLE_CUDA_13=1 \
-#    build-arg:SGLANG_IMAGE_TAG=v0.5.9-cu130-arm64 \
+#    build-arg:SGLANG_IMAGE_TAG=v0.5.10-cu130 \
 #    build-arg:WHEELS_TAG=cu130-aarch64 \
 
-ARG SGLANG_IMAGE_TAG=v0.5.9
+ARG SGLANG_IMAGE_TAG=v0.5.10
 FROM lmsysorg/sglang:${SGLANG_IMAGE_TAG} AS sglang
 
 # ======================================== Arguments =============================================
@@ -46,7 +46,7 @@ RUN mkdir -p /tmp/wheels && \
     curl -sL "https://api.github.com/repos/${WHEELS_REPO}/releases/tags/${WHEELS_TAG}" \
     | python3 -c "import sys, json, subprocess; \
 [subprocess.run(['curl', '-fSL', '-o', '/tmp/wheels/' + a['name'], a['browser_download_url']], check=True) \
- for a in json.load(sys.stdin)['assets'] if a['name'].endswith('.whl')]" && \
+ for a in json.load(sys.stdin)['assets'] if a['name'].endswith(('.whl', '.tar.gz'))]" && \
     ls -lh /tmp/wheels/
 
 # ====================================== Python dependencies ============================================
@@ -63,7 +63,7 @@ RUN pip install /tmp/wheels/flash_attn_3-*.whl && \
 
 RUN pip install git+https://github.com/ISEEKYAN/mbridge.git@89eb10887887bc74853f89a4de258c0702932a1c --no-deps
 
-RUN pip install flash-linear-attention==0.4.1
+RUN pip install flash-linear-attention==0.4.2
 RUN pip install tilelang -f https://tile-ai.github.io/whl/nightly/cu128/
 
 RUN if [ "${ENABLE_CUDA_13}" = "1" ]; then \
@@ -83,12 +83,12 @@ RUN git clone https://github.com/${MEGATRON_REPO}.git --recursive -b ${MEGATRON_
 RUN pip install git+https://github.com/fzyzcjy/torch_memory_saver.git@d64a639 --no-cache-dir --force-reinstall
 # RUN pip install git+https://github.com/fzyzcjy/Megatron-Bridge.git@dev_rl --no-build-isolation
 RUN pip install "nvidia-modelopt[torch]>=0.37.0" --no-build-isolation
-RUN pip install git+https://github.com/yushengsu-thu/Megatron-Bridge.git@merged-megatron-0.16.0rc0-miles --no-deps --no-build-isolation
+RUN pip install git+https://github.com/radixark/Megatron-Bridge.git@bridge --no-deps --no-build-isolation
 RUN pip install megatron-energon --no-deps
 RUN pip install multi-storage-client --no-deps
 
 COPY requirements.txt /tmp/requirements.txt
-RUN pip install -r /tmp/requirements.txt
+RUN rm -rf /usr/lib/python3/dist-packages/jwt /usr/lib/python3/dist-packages/PyJWT* && pip install -r /tmp/requirements.txt
 
 # https://github.com/pytorch/pytorch/issues/168167
 RUN if [ "${ENABLE_CUDA_13}" = "1" ]; then \
@@ -125,4 +125,36 @@ RUN git clone https://github.com/radixark/miles.git /root/miles && \
 # int4_qat
 RUN pip install /tmp/wheels/fake_int4_quant_cuda-*.whl
 
+# ====================================== Install sgl-model-gateway ============================================
+#   SGL_ROUTER_USE_WHEELS=0:
+#     Build from source  https://github.com/radixark/sgl-router-for-miles
+#   SGL_ROUTER_USE_WHEELS=1 (default):
+#     Install the pre-built sgl-model-gateway wheel
+
+ARG SGL_ROUTER_USE_WHEELS=1
+ARG SGL_ROUTER_REPO=https://github.com/radixark/sgl-router-for-miles.git
+ARG SGL_ROUTER_BRANCH=main
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    set -eux; \
+    if [ "${SGL_ROUTER_USE_WHEELS}" = "1" ]; then \
+      pip install --force-reinstall /tmp/wheels/sglang_router-*.whl && \
+      tar xzf /tmp/wheels/sgl-model-gateway-linux-*.tar.gz -C /usr/local/bin/ && \
+      chmod +x /usr/local/bin/sgl-model-gateway; \
+    elif [ "${SGL_ROUTER_USE_WHEELS}" = "0" ]; then \
+      git clone --branch "${SGL_ROUTER_BRANCH}" --depth 1 "${SGL_ROUTER_REPO}" /build/sgl-model-gateway && \
+      curl --proto '=https' --tlsv1.2 --retry 3 --retry-delay 2 -sSf https://sh.rustup.rs | sh -s -- -y && \
+      export PATH="/root/.cargo/bin:${PATH}" && \
+      python3 -m pip install maturin && \
+      cd /build/sgl-model-gateway/bindings/python && \
+      ulimit -n 65536 && \
+      maturin build --release --features vendored-openssl --out /build/gateway_wheels && \
+      cd /build/sgl-model-gateway && \
+      cargo build --release --bin sgl-model-gateway --features vendored-openssl && \
+      cp target/release/sgl-model-gateway /usr/local/bin/sgl-model-gateway && \
+      chmod +x /usr/local/bin/sgl-model-gateway && \
+      pip install --force-reinstall /build/gateway_wheels/sglang_router-*.whl && \
+      rm -rf /root/.cargo /root/.rustup /build/sgl-model-gateway /build/gateway_wheels; \
+    fi
+
 RUN rm -rf /tmp/wheels