From f0c9d3cc1f9f9a9ed98723e9462f8e1a3465a428 Mon Sep 17 00:00:00 2001 From: David <12414531+DavidBellamy@users.noreply.github.com> Date: Thu, 16 Apr 2026 07:15:09 -0700 Subject: [PATCH] feat(sglang_engine): allow PD worker_type on /add_worker registration path The old sglang_router (<=0.2.1) and the miles-router both use the single-arg /add_worker?url=... endpoint for engine registration. Previously, the Miles engine asserted worker_type=='regular' before hitting that endpoint, so any attempt to stand up prefill/decode workers via the miles-router path (including the sgl-model-gateway that mirrors it) fail-fasts at engine init: AssertionError: pd disaggregation is not supported in old router or miles router. This blocks PD disagg throughput scaling in any deployment that uses the miles-router path, even when the receiving router (e.g. sgl-model-gateway with a PD-aware shim) can handle worker_type on /add_worker. Relax the assertion: forward worker_type (and bootstrap_port for prefill) as extra query params. Routers that honor them get PD registration; routers that only accept the single-arg form ignore the extras and register as regular, with a warning logged so the fallback is visible. The companion server-side change is on the receiving router: - sgl-model-gateway must accept ?worker_type=&bootstrap_port= on /add_worker - Or deployments can use the newer /workers endpoint (non-miles path). Context: LLM360/RL360 #76. Track G (job 1559336) showed full PD KV transfer via mooncake works with SGLang's own mini_lb; this unblocks the same flow through Miles-driven rollouts. --- miles/backends/sglang_utils/sglang_engine.py | 32 +++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/miles/backends/sglang_utils/sglang_engine.py b/miles/backends/sglang_utils/sglang_engine.py index 8b567a744d..6650930a20 100644 --- a/miles/backends/sglang_utils/sglang_engine.py +++ b/miles/backends/sglang_utils/sglang_engine.py @@ -216,12 +216,34 @@ def _init_normal(self, server_args_dict): if self.node_rank == 0 and self.router_ip and self.router_port: if parse(sglang_router.__version__) <= parse("0.2.1") or self.args.use_miles_router: - assert ( - self.worker_type == "regular" - ), "pd disaggregation is not supported in old router or miles router." - response = requests.post( - f"http://{self.router_ip}:{self.router_port}/add_worker?url=http://{self.server_host}:{self.server_port}" + # Old sglang_router (<=0.2.1) and miles-router use the single-arg + # /add_worker?url=... endpoint. For PD disaggregation, forward + # worker_type (and bootstrap_port for prefill) as extra query + # params so a router that supports PD via /add_worker can act on + # them. Routers that only understand the regular form will see + # the extra params, ignore them, and register the worker as + # regular -- so PD routing through such a router still needs a + # server-side update. This at least removes the unconditional + # assert that would fail-fast before the request is ever sent. + add_worker_url = ( + f"http://{self.router_ip}:{self.router_port}/add_worker" + f"?url=http://{self.server_host}:{self.server_port}" ) + if self.worker_type != "regular": + add_worker_url += f"&worker_type={self.worker_type}" + if self.worker_type == "prefill": + bootstrap_port = server_args_dict.get("disaggregation_bootstrap_port") + if bootstrap_port is not None: + add_worker_url += f"&bootstrap_port={bootstrap_port}" + logger.warning( + "Registering a '%s' worker via /add_worker on the " + "old-style router path. PD disaggregation requires the " + "router to honor worker_type on this endpoint; if it " + "only accepts the single-arg form, workers will be " + "treated as regular and PD routing will not function.", + self.worker_type, + ) + response = requests.post(add_worker_url) else: payload = { "url": f"http://{self.server_host}:{self.server_port}",