Skip to content

Commit bd217a6

Browse files
committed
some minor cleanup
1 parent f2270ad commit bd217a6

6 files changed

Lines changed: 553 additions & 10 deletions

File tree

slime/backends/megatron_utils/actor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ def train_actor(self, rollout_id: int, rollout_data: RolloutBatch) -> None:
468468
compute_advantages_and_returns(self.args, rollout_data)
469469

470470
if self.rollout_data_postprocess is not None:
471-
self.rollout_data_postprocess(self.args)
471+
self.rollout_data_postprocess(self.args, rollout_id, rollout_data)
472472

473473
log_rollout_data(
474474
rollout_id,

slime/backends/sglang_utils/sglang_engine.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def _to_local_gpu_id(physical_gpu_id: int) -> int:
5151

5252

5353
def launch_server_process(server_args: ServerArgs) -> multiprocessing.Process:
54-
if hasattr(server_args, "encoder_only") and server_args.encoder_only:
54+
if getattr(server_args, "encoder_only", False):
5555
from sglang.srt.disaggregation.encode_server import launch_server
5656
else:
5757
from sglang.srt.entrypoints.http_server import launch_server
@@ -250,13 +250,12 @@ def health_generate(self, timeout: float = 5.0) -> bool:
250250
if self.node_rank != 0:
251251
return True
252252

253-
url = f"http://{self.server_host}:{self.server_port}/health_generate"
254-
try:
255-
response = requests.get(url, timeout=timeout)
256-
response.raise_for_status()
257-
return True
258-
except requests.RequestException:
259-
raise
253+
response = requests.get(
254+
f"http://{self.server_host}:{self.server_port}/health_generate",
255+
timeout=timeout,
256+
)
257+
response.raise_for_status()
258+
return True
260259

261260
def update_weights_from_tensor(
262261
self,

slime/utils/eval_config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ class EvalDatasetConfig:
122122
# If set, eval will use ServerGenerationProxy to generate through AppServer
123123
app_service: str | None = None
124124

125+
eval_task_timeout: int | None = None
126+
125127
metadata_overrides: dict[str, Any] = field(default_factory=dict)
126128

127129
def __post_init__(self) -> None:

slime/utils/wandb_utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,13 @@ def reinit_wandb_primary_with_open_metrics(args, router_addr):
8989
"""
9090
if not args.use_wandb or _is_offline_mode(args):
9191
return
92+
if getattr(args, "wandb_mode", None) == "disabled":
93+
return
9294
if router_addr is None:
9395
return
96+
wandb_run_id = getattr(args, "wandb_run_id", None)
97+
if wandb_run_id is None:
98+
return
9499

95100
import sglang_router
96101

@@ -105,7 +110,7 @@ def reinit_wandb_primary_with_open_metrics(args, router_addr):
105110
wandb.finish()
106111

107112
init_kwargs = {
108-
"id": args.wandb_run_id,
113+
"id": wandb_run_id,
109114
"entity": args.wandb_team,
110115
"project": args.wandb_project,
111116
"resume": "allow",
@@ -123,6 +128,7 @@ def reinit_wandb_primary_with_open_metrics(args, router_addr):
123128
}
124129

125130
if args.wandb_dir:
131+
os.makedirs(args.wandb_dir, exist_ok=True)
126132
init_kwargs["dir"] = args.wandb_dir
127133

128134
wandb.init(**init_kwargs)

0 commit comments

Comments
 (0)