-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrun.py
More file actions
415 lines (351 loc) · 17.6 KB
/
Copy pathrun.py
File metadata and controls
415 lines (351 loc) · 17.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
#!/usr/bin/env python3
"""
AccelMark unified runner entry point.
Usage:
# List all available runners
python run.py --list
# Run a benchmark
python run.py --runner nvidia_vllm_47f5d58e --suite suite_A --scenario all
# Run a benchmark on multiple chips
# Set tensor_parallel_size in configs/runner_configs/runner_nvidia_vllm_47f5d58e.yaml
# or pass --tensor-parallel-size directly (supported by runners that accept it)
python run.py --runner nvidia_vllm_47f5d58e --suite suite_B --scenario all --tensor-parallel-size 4
# Serve — using a suite for model + generation params
python run.py --runner nvidia_vllm_47f5d58e --suite suite_A --serve
# Serve — specifying the model directly (no suite required)
python run.py --runner nvidia_vllm_47f5d58e --model meta-llama/Llama-3.1-8B-Instruct --serve
# Serve — suite as base, override model and tune params
python run.py --runner nvidia_vllm_47f5d58e --suite suite_A --serve \\
--model meta-llama/Llama-3.1-8B-Instruct \\
--max-tokens 4096 --port 8080 --workers 8 --api-key secret
# All flags after --runner <id> are passed through to the runner unchanged (non-serve mode)
python run.py --runner nvidia_vllm_47f5d58e --suite suite_A --scenario offline --output-dir ./my_result
"""
import argparse
import importlib.util
import inspect
import json
import subprocess
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).parent
RUNNERS_DIR = REPO_ROOT / "runners"
SUITES_DIR = REPO_ROOT / "suites"
# Files that live flat in runners/ — not runner folders
_BASE_FILES = {
"benchmark_runner.py", "collect_env.py", "validate_submission.py",
"validate_runners.py", "hash_runner.py", "meta.schema.json",
"protocol.py", "__pycache__", "__init__.py",
}
# ── Runner discovery ──────────────────────────────────────────────────────────
def discover_runners() -> dict[str, dict]:
"""
Return a dict of {runner_id: meta} for all valid runner folders.
Runners with missing or unreadable meta.json are included with partial data.
"""
runners = {}
for folder in sorted(RUNNERS_DIR.iterdir()):
if not folder.is_dir():
continue
if folder.name in _BASE_FILES or folder.name.startswith("."):
continue
if not (folder / "runner.py").exists():
continue
meta_path = folder / "meta.json"
if meta_path.exists():
try:
meta = json.loads(meta_path.read_text())
except Exception:
meta = {"id": folder.name, "name": folder.name, "platform": "unknown",
"description": "(meta.json unreadable)"}
else:
meta = {"id": folder.name, "name": folder.name, "platform": "unknown",
"description": "(no meta.json)"}
runners[folder.name] = meta
return runners
# ── Commands ──────────────────────────────────────────────────────────────────
def cmd_list(args) -> int:
show_all = getattr(args, 'all', False)
runners = discover_runners()
if not runners:
print("No runners found in runners/")
print("See CONTRIBUTING.md to add a runner.")
return 0
by_platform: dict[str, list] = {}
for rid, meta in runners.items():
is_deprecated = bool(meta.get("deprecated_by"))
if is_deprecated and not show_all:
continue
platform = meta.get("platform", "other")
by_platform.setdefault(platform, []).append((rid, meta))
if not by_platform:
print("No active runners found.")
print("Run with --all to include deprecated runners.")
return 0
total_shown = sum(len(v) for v in by_platform.values())
total_all = len(runners)
hidden = total_all - total_shown
print(f"\nAvailable runners ({total_shown} active"
+ (f", {hidden} deprecated — run --list --all to show" if hidden and not show_all else "")
+ ")\n")
for platform in sorted(by_platform):
print(f" {platform.upper()}")
for rid, meta in by_platform[platform]:
deprecated_by = meta.get("deprecated_by")
supersedes_chain = meta.get("supersedes_chain") or []
status = ""
if deprecated_by and show_all:
status = f" [DEPRECATED → use {deprecated_by}]"
print(f" {rid}{status}")
print(f" {meta.get('name', rid)}")
print(f" {meta.get('description', '')}")
if supersedes_chain:
print(f" Replaces: {supersedes_chain[0]}")
install_sh = RUNNERS_DIR / rid / "install.sh"
req_path = RUNNERS_DIR / rid / "requirements.txt"
if install_sh.exists():
print(f" Install: bash runners/{rid}/install.sh")
elif req_path.exists():
print(f" Install: pip install -r runners/{rid}/requirements.txt")
print()
return 0
def cmd_run(runner_id: str, runner_args: list[str]) -> int:
runner_dir = RUNNERS_DIR / runner_id
runner_py = runner_dir / "runner.py"
# ── Existence check ───────────────────────────────────────────────────────
if not runner_dir.exists():
print(f"Error: runner '{runner_id}' not found in runners/")
print()
print("Available runners:")
for rid in sorted(r.name for r in RUNNERS_DIR.iterdir()
if r.is_dir() and r.name not in _BASE_FILES
and (r / "runner.py").exists()):
print(f" {rid}")
return 1
if not runner_py.exists():
print(f"Error: runners/{runner_id}/runner.py does not exist")
return 1
# ── Load and show meta ────────────────────────────────────────────────────
meta_path = runner_dir / "meta.json"
if meta_path.exists():
try:
meta = json.loads(meta_path.read_text())
deprecated_by = meta.get("deprecated_by")
if deprecated_by:
print(f"Warning: '{runner_id}' has been superseded by '{deprecated_by}'.")
print(f" Consider using the newer runner instead:")
print(f" python run.py --runner {deprecated_by} ...")
print()
print(f"Runner: {meta.get('name', runner_id)}")
print(f"ID: {runner_id}")
print(f"By: {meta.get('submitted_by', '—')}")
supersedes_chain = meta.get("supersedes_chain") or []
if supersedes_chain:
print(f"Replaces: {supersedes_chain[0]}")
print()
except Exception:
pass
# ── Delegate to runner.py ─────────────────────────────────────────────────
cmd = [sys.executable, str(runner_py)] + runner_args
return subprocess.call(cmd, cwd=str(REPO_ROOT))
_SERVE_DEFAULT_MAX_TOKENS = 2048
def cmd_serve(
runner_id: str,
suite_id: str | None,
model_id: str | None,
model_path: str | None,
max_tokens: int,
max_model_len: int | None,
port: int,
host: str,
workers: int,
api_key: str | None,
) -> int:
"""
Launch the OpenAI-compatible inference server for the given runner.
Model and generation params can come from a suite file (--suite) or be
specified directly (--model, --max-tokens, --max-model-len). If both
--suite and --model are given, --model overrides the suite's model_id.
Explicit --max-tokens / --max-model-len always override suite values.
"""
runner_dir = RUNNERS_DIR / runner_id
runner_py = runner_dir / "runner.py"
# ── Validate runner ───────────────────────────────────────────────────────
if not runner_dir.exists():
print(f"Error: runner '{runner_id}' not found in runners/")
return 1
if not runner_py.exists():
print(f"Error: runners/{runner_id}/runner.py does not exist")
return 1
# ── Build suite dict ──────────────────────────────────────────────────────
if suite_id:
suite_path = SUITES_DIR / suite_id / "suite.json"
if not suite_path.exists():
print(f"Error: suite '{suite_id}' not found at {suite_path}")
return 1
try:
suite = json.loads(suite_path.read_text())
except Exception as e:
print(f"Error reading suite.json: {e}")
return 1
# --model overrides the suite's model_id if provided
effective_model_id = model_id or suite.get("model_id")
if not effective_model_id:
print(f"Error: suite '{suite_id}' has no 'model_id' field")
return 1
# Explicit flags override suite values
suite["output_tokens_max"] = max_tokens or suite.get("output_tokens_max",
_SERVE_DEFAULT_MAX_TOKENS)
if max_model_len is not None:
suite["max_model_len"] = max_model_len
else:
# No suite — --model is required
if not model_id:
print("Error: either --suite or --model is required for --serve")
return 1
effective_model_id = model_id
suite = {
"model_id": effective_model_id,
"output_tokens_max": max_tokens,
}
if max_model_len is not None:
suite["max_model_len"] = max_model_len
# ── Import runner class ───────────────────────────────────────────────────
sys.path.insert(0, str(REPO_ROOT))
spec = importlib.util.spec_from_file_location("runner_module", str(runner_py))
mod = importlib.util.module_from_spec(spec)
try:
spec.loader.exec_module(mod)
except Exception as e:
print(f"Error importing runners/{runner_id}/runner.py: {e}")
return 1
from runners.benchmark_runner import BenchmarkRunner
runner_class = None
for _, cls in inspect.getmembers(mod, inspect.isclass):
if (
cls is not BenchmarkRunner
and issubclass(cls, BenchmarkRunner)
and cls.__module__ == "runner_module"
):
runner_class = cls
break
if runner_class is None:
print(f"Error: no BenchmarkRunner subclass found in runners/{runner_id}/runner.py")
return 1
# ── Instantiate and configure runner ─────────────────────────────────────
runner = runner_class()
# Signal to load_model() that we need the async engine (streaming path)
runner._current_scenario = "online"
# ── Resolve model path ────────────────────────────────────────────────────
effective_model_path = runner._resolve_model_path(effective_model_id, model_path)
# ── Load model ────────────────────────────────────────────────────────────
print(f"Runner: {runner_class.__name__} ({runner_id})")
if suite_id:
print(f"Suite: {suite_id}")
print(f"Model: {effective_model_id}")
print(f"Path: {effective_model_path}")
print(f"Params: max_tokens={suite['output_tokens_max']}"
+ (f" max_model_len={suite['max_model_len']}" if suite.get("max_model_len") else ""))
print()
try:
# Serve mode bypasses parse_args(), so load runner config directly here.
# This populates self._runner_config so load_model() can read named fields
# and engine_kwargs exactly as it does in benchmark mode.
_serve_cfg = runner._load_runner_config(suite_id)
runner._runner_config = _serve_cfg
_tp = _serve_cfg.get("tensor_parallel_size", 1)
runner.load_model(effective_model_path, suite, {
"tensor_parallel_size": _tp,
"pipeline_parallel_size": 1,
"expert_parallel_size": 1,
"data_parallel_size": 1,
})
except Exception as e:
print(f"Error loading model: {e}")
return 1
# ── Start serve ───────────────────────────────────────────────────────────
from serve.server import start_server
start_server(
runner=runner,
model_id=effective_model_id,
port=port,
host=host,
workers=workers,
api_key=api_key,
)
return 0
# ── Serve argument parser ─────────────────────────────────────────────────────
def _parse_serve_args(runner_args: list[str]) -> argparse.Namespace:
"""Parse serve-specific flags from the runner args list."""
parser = argparse.ArgumentParser(
prog="run.py --runner <id>",
description="AccelMark serve mode",
add_help=False,
)
parser.add_argument("--suite", default=None,
help="Suite ID (e.g. suite_A) — defines model and generation params. "
"Optional if --model is given.")
parser.add_argument("--model", default=None, dest="model_id",
help="HuggingFace model ID or name (required if --suite not given; "
"overrides suite model_id if both are given)")
parser.add_argument("--model-path", default=None, dest="model_path",
help="Local path to model weights (overrides HF download)")
parser.add_argument("--max-tokens", type=int, default=_SERVE_DEFAULT_MAX_TOKENS,
dest="max_tokens",
help=f"Max output tokens per request (default: {_SERVE_DEFAULT_MAX_TOKENS})")
parser.add_argument("--max-model-len", type=int, default=None, dest="max_model_len",
help="Max model context length — leave unset to let the framework decide")
parser.add_argument("--port", type=int, default=8000,
help="HTTP port to listen on (default: 8000)")
parser.add_argument("--host", default="0.0.0.0",
help="Bind address (default: 0.0.0.0)")
parser.add_argument("--workers", type=int, default=4,
help="Max concurrent in-flight requests (default: 4)")
parser.add_argument("--api-key", default=None, dest="api_key",
help="If set, all endpoints require Authorization: Bearer <key>")
parser.add_argument("--serve", action="store_true") # consumed here
args, unknown = parser.parse_known_args(runner_args)
if unknown:
print(f"Warning: unrecognised serve flags ignored: {' '.join(unknown)}")
return args
# ── Entry point ───────────────────────────────────────────────────────────────
def main() -> int:
argv = sys.argv[1:]
if not argv:
return cmd_list(type('args', (), {'all': False})())
if "--list" in argv:
show_all = "--all" in argv
return cmd_list(type('args', (), {'all': show_all})())
if "--runner" not in argv:
print("Usage:")
print(" python run.py --list [--all]")
print(" python run.py --runner <id> --suite <suite> --scenario <scenario> [...]")
print(" python run.py --runner <id> --serve --suite <suite> [--model <id>] [--model-path <path>]")
print(" python run.py --runner <id> --serve --model <model_id>")
print(" Serve options: --port N --host H --workers N --max-tokens N --max-model-len N --api-key K")
return 1
runner_idx = argv.index("--runner")
if runner_idx + 1 >= len(argv):
print("Error: --runner requires a runner ID")
print("Run 'python run.py --list' to see available runners.")
return 1
runner_id = argv[runner_idx + 1]
runner_args = argv[runner_idx + 2:]
# ── Serve mode ────────────────────────────────────────────────────────────
if "--serve" in runner_args:
args = _parse_serve_args(runner_args)
return cmd_serve(
runner_id=runner_id,
suite_id=args.suite,
model_id=args.model_id,
model_path=args.model_path,
max_tokens=args.max_tokens,
max_model_len=args.max_model_len,
port=args.port,
host=args.host,
workers=args.workers,
api_key=args.api_key,
)
return cmd_run(runner_id, runner_args)
if __name__ == "__main__":
sys.exit(main())