THUDM · zhuzilin · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026 · Mar 27, 2026
diff --git a/docs/en/advanced/slime-router.md b/docs/en/advanced/slime-router.md
diff --git a/docs/en/get_started/customization.md b/docs/en/get_started/customization.md
@@ -406,7 +406,7 @@ Stabilize MoE RL training by recording and replaying expert routing decisions to
 | Argument | Description |
 | --- | --- |
 | `--use-routing-replay` | Forward-backward routing consistency in training. ([arXiv:2507.18071](https://arxiv.org/abs/2507.18071)) |
-| `--use-rollout-routing-replay` | R3: Replay routing from rollout during training. Works with both slime router and SGLang Model Gateway. ([arXiv:2510.11370](https://arxiv.org/abs/2510.11370)) |
+| `--use-rollout-routing-replay` | R3: Replay routing from rollout during training. Supported by slime's default `sglang_router` path. ([arXiv:2510.11370](https://arxiv.org/abs/2510.11370)) |
 
 ## Testing Custom Function Paths
 

diff --git a/docs/en/index.rst b/docs/en/index.rst
@@ -49,7 +49,6 @@ slime is the RL-framework behind GLM-4.7, GLM-4.6 and GLM-4.5. Apart from models
    advanced/pd-disaggregation.md
    advanced/sglang-config.md
    advanced/arch-support-beyond-megatron.md
-   advanced/slime-router.md
 
 .. toctree::
    :maxdepth: 1

diff --git a/docs/zh/advanced/slime-router.md b/docs/zh/advanced/slime-router.md
diff --git a/docs/zh/get_started/customization.md b/docs/zh/get_started/customization.md
@@ -408,7 +408,7 @@ def custom_hook(args, rollout_id, step_id, model, optimizer, opt_param_scheduler
 | 参数 | 说明 |
 | --- | --- |
 | `--use-routing-replay` | 训练中前向-反向路由一致性。([arXiv:2507.18071](https://arxiv.org/abs/2507.18071)) |
-| `--use-rollout-routing-replay` | R3：在训练时重放 rollout 阶段的路由。slime router 和 SGLang Model Gateway 均支持。([arXiv:2510.11370](https://arxiv.org/abs/2510.11370)) |
+| `--use-rollout-routing-replay` | R3：在训练时重放 rollout 阶段的路由。slime 默认的 `sglang_router` 路径支持该功能。([arXiv:2510.11370](https://arxiv.org/abs/2510.11370)) |
 
 ## 自定义函数路径的测试
 

diff --git a/docs/zh/index.rst b/docs/zh/index.rst
@@ -49,7 +49,6 @@ slime 是 GLM-4.7、GLM-4.6、GLM-4.5 背后的 RL 训练框架。除此之外
    advanced/pd-disaggregation.md
    advanced/sglang-config.md
    advanced/arch-support-beyond-megatron.md
-   advanced/slime-router.md
 
 .. toctree::
    :maxdepth: 1

diff --git a/scripts/low_precision/run-kimi-k2-Thinking-int4.sh b/scripts/low_precision/run-kimi-k2-Thinking-int4.sh
@@ -135,7 +135,6 @@ SGLANG_ARGS=(
 
    # make every dp rank has 128 concurrency
    --sglang-server-concurrency 1024
-   --use-slime-router
 )
 
 

diff --git a/scripts/low_precision/run-qwen3-235B-A22B-int4.sh b/scripts/low_precision/run-qwen3-235B-A22B-int4.sh
@@ -119,7 +119,6 @@ SGLANG_ARGS=(
   #  --sglang-dp-size 4
    --sglang-ep-size 8
    --sglang-cuda-graph-bs 1 2 4 8 $(seq 16 8 256)
-   --use-slime-router
 )
 
 

diff --git a/scripts/low_precision/run-qwen3-30B-A3B-int4.sh b/scripts/low_precision/run-qwen3-30B-A3B-int4.sh
@@ -114,7 +114,6 @@ SGLANG_ARGS=(
    --rollout-num-gpus-per-engine 1
    --sglang-mem-fraction-static 0.7
    --sglang-cuda-graph-bs 1 2 4 8 $(seq 16 8 256)
-   --use-slime-router
 )
 
 MISC_ARGS=(

diff --git a/scripts/low_precision/run-qwen3-30b-a3b-fp8.sh b/scripts/low_precision/run-qwen3-30b-a3b-fp8.sh
@@ -127,7 +127,6 @@ SGLANG_ARGS=(
    --sglang-mem-fraction-static 0.6
    --sglang-cuda-graph-bs 1 2 4 8 $(seq 16 8 256)
    --sglang-expert-parallel-size 8
-   --use-slime-router
    # --use-rollout-routing-replay
 )
 

diff --git a/scripts/run-qwen2.5-0.5B-reproducibility.sh b/scripts/run-qwen2.5-0.5B-reproducibility.sh
@@ -126,7 +126,6 @@ ray job submit --address="http://127.0.0.1:8265" \
    --actor-num-gpus-per-node 8 \
    --colocate \
    --calculate-per-token-loss \
-   --use-slime-router \
    ${MODEL_ARGS[@]} \
    ${CKPT_ARGS[@]} \
    ${ROLLOUT_ARGS[@]} \

diff --git a/slime/backends/sglang_utils/sglang_engine.py b/slime/backends/sglang_utils/sglang_engine.py
@@ -195,7 +195,7 @@ def _init_normal(self, server_args_dict):
             return
 
         if self.node_rank == 0 and self.router_ip and self.router_port:
-            if not self.args.use_slime_router and parse(sglang_router.__version__) <= parse("0.2.1"):
+            if parse(sglang_router.__version__) <= parse("0.2.1"):
                 assert self.worker_type == "regular", "pd disaggregation is not supported in old router."
                 response = requests.post(
                     f"http://{self.router_ip}:{self.router_port}/add_worker?url=http://{self.server_host}:{self.server_port}"
@@ -315,7 +315,7 @@ def shutdown(self):
         if self.worker_type != "encoder" and self.node_rank == 0:
             worker_url = f"http://{self.server_host}:{self.server_port}"
             response = None
-            if self.args.use_slime_router or parse(sglang_router.__version__) <= parse("0.2.1"):
+            if parse(sglang_router.__version__) <= parse("0.2.1"):
                 response = requests.post(
                     f"http://{self.router_ip}:{self.router_port}/remove_worker?url=http://{self.server_host}:{self.server_port}"
                 )

diff --git a/slime/ray/rollout.py b/slime/ray/rollout.py
@@ -398,16 +398,7 @@ def _get_metrics_router_addr(self) -> str | None:
         which aggregates Prometheus metrics from all backend sglang servers.
         Returns ``http://{ip}:{port}`` for the first server, or ``None`` when
         metrics are disabled or no servers are running.
-
-        Note: the ``use_slime_router`` path does not expose ``/engine_metrics``;
-        metrics forwarding to W&B requires the sglang_router gateway.
         """
-        if getattr(self.args, "use_slime_router", False):
-            logger.warning(
-                "SGLang metrics forwarding to W&B is not supported with --use-slime-router. "
-                "Use the default sglang_router gateway for /engine_metrics aggregation."
-            )
-            return None
         srv = self.server
         if srv is None or srv.router_ip is None:
             return None
@@ -914,7 +905,7 @@ def addr():
 
 
 def _start_router(args, *, has_pd_disaggregation: bool = False, force_new: bool = False) -> tuple[str, int]:
-    """Start sgl router or slime router and return (router_ip, router_port).
+    """Start sglang_router and return (router_ip, router_port).
 
     If ``args.sglang_router_ip`` is already set (e.g. by the user) and
     ``force_new`` is False, skip launching and return the existing values.
@@ -931,37 +922,28 @@ def _start_router(args, *, has_pd_disaggregation: bool = False, force_new: bool
         if router_port is None:
             router_port = find_available_port(random.randint(3000, 4000))
 
-    if args.use_slime_router:
-        import copy
-
-        from slime.router.router import run_router
-
-        router_args = copy.copy(args)
-        router_args.sglang_router_ip = router_ip
-        router_args.sglang_router_port = router_port
-    else:
-        from sglang_router.launch_router import RouterArgs
+    from sglang_router.launch_router import RouterArgs
 
-        from slime.utils.http_utils import run_router
+    from slime.utils.http_utils import run_router
 
-        router_args = RouterArgs.from_cli_args(args, use_router_prefix=True)
-        router_args.host = router_ip
-        router_args.port = router_port
-        router_args.prometheus_port = find_available_port(random.randint(4000, 5000))
-        router_args.log_level = "warn"
-        router_args.request_timeout_secs = args.sglang_router_request_timeout_secs
+    router_args = RouterArgs.from_cli_args(args, use_router_prefix=True)
+    router_args.host = router_ip
+    router_args.port = router_port
+    router_args.prometheus_port = find_available_port(random.randint(4000, 5000))
+    router_args.log_level = "warn"
+    router_args.request_timeout_secs = args.sglang_router_request_timeout_secs
 
-        if has_pd_disaggregation:
-            router_args.pd_disaggregation = True
-            # Disable circuit breaker to prevent RDMA transfer timeouts from
-            # marking decode workers as dead. Timeouts are transient (PCIe
-            # contention under high load) and do not indicate a dead server.
-            router_args.disable_circuit_breaker = True
+    if has_pd_disaggregation:
+        router_args.pd_disaggregation = True
+        # Disable circuit breaker to prevent RDMA transfer timeouts from
+        # marking decode workers as dead. Timeouts are transient (PCIe
+        # contention under high load) and do not indicate a dead server.
+        router_args.disable_circuit_breaker = True
 
-        # We will not use the health check from router.
-        router_args.disable_health_check = True
+    # We will not use the health check from router.
+    router_args.disable_health_check = True
 
-        logger.info(f"Launch router with args: {router_args}")
+    logger.info(f"Launch router with args: {router_args}")
 
     process = multiprocessing.Process(
         target=run_router,
@@ -972,7 +954,7 @@ def _start_router(args, *, has_pd_disaggregation: bool = False, force_new: bool
     # Wait 3 seconds
     time.sleep(3)
     assert process.is_alive()
-    logger.info(f"Router launched at {router_ip}:{router_port}")
+    logger.info(f"Router launched at {router_ip}:{router_port}, Prometheus port: {router_args.prometheus_port}")
     return router_ip, router_port
 
 

diff --git a/slime/rollout/sglang_rollout.py b/slime/rollout/sglang_rollout.py
@@ -316,7 +316,7 @@ async def abort(args: Namespace, rollout_id: int) -> list[list[Sample]]:
     assert not state.aborted
     state.aborted = True
 
-    if parse(sglang_router.__version__) <= parse("0.2.1") or args.use_slime_router:
+    if parse(sglang_router.__version__) <= parse("0.2.1"):
         response = await get(f"http://{args.sglang_router_ip}:{args.sglang_router_port}/list_workers")
         urls = response["urls"]
     else:

diff --git a/slime/router/__init__.py b/slime/router/__init__.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -135,7 +135,6 @@ SGLANG_ARGS=( @@
        # make every dp rank has 128 concurrency
        --sglang-server-concurrency 1024
-       --use-slime-router
     )
@@ Expand Down @@