Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@ torchrun --nproc-per-node 4 benchmark_sharded_attention.py \
| `--dtype` | `float32` | `float32`, `float16`, or `bfloat16` |
| `--num_warmup` | 5 | Warmup iterations |
| `--num_iterations` | 10 | Timed iterations |
| `--output_file` | — | Path to write JSON results |
| `--results_dir` | `<script_dir>/results/` | Directory for the auto-named JSON output |
| `--print-only` | off | Skip writing JSON; print to stdout only |

By default the benchmark writes a JSON file to `results/` whose name
encodes the run configuration, e.g.
`single_gpu_inference_float32_seq4096.json` or
`distributed_4gpu_train_bfloat16_seq8192.json`. Pass `--print-only` to
disable this.

## Plotting results

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import argparse
import json
from datetime import datetime
from pathlib import Path

import numpy as np
import torch
Expand All @@ -36,14 +37,22 @@

from physicsnemo.utils import Profiler

# Default output directory for benchmark JSON results, sibling to this script.
# Filenames are built to match the regex consumed by ``plot_scaling_results.py``:
# <topology>_<mode>_<dtype>_seq<seq_len>.json
# where ``<topology>`` is either ``single_gpu`` or ``distributed_<N>gpu``.
_SCRIPT_DIR = Path(__file__).resolve().parent
DEFAULT_RESULTS_DIR = _SCRIPT_DIR / "results"


def parse_args():
"""Parse command-line arguments for the attention benchmark.

Returns:
argparse.Namespace: Parsed arguments including seq_len, num_heads,
head_dim, batch_size, warmup/iteration counts, dtype, benchmark
mode (inference or train), and an optional output file path.
mode (inference or train), the results directory, and a
``--print-only`` flag that disables JSON output.
"""
parser = argparse.ArgumentParser(
description="Benchmark scaled_dot_product_attention: single GPU vs ShardTensor"
Expand Down Expand Up @@ -85,14 +94,37 @@ def parse_args():
help="Benchmark mode: 'inference' (forward only) or 'train' (forward + backward)",
)
parser.add_argument(
"--output_file",
"--results_dir",
type=str,
default=None,
help="Path to write JSON results file. If not set, results are only printed.",
default=str(DEFAULT_RESULTS_DIR),
help=(
"Directory in which to write the JSON results file. "
"The filename is auto-generated to match the format expected by "
"plot_scaling_results.py. Ignored when --print-only is set."
),
)
parser.add_argument(
"--print-only",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tiny consistency nitpick: every other flag in this file is underscore-cased (--results_dir, ...) and --print-only is the only one that is - based. Would it make sense to use --print_only instead?

dest="print_only",
action="store_true",
help="Print results to stdout only; do not write a JSON file.",
)
return parser.parse_args()


def build_output_filename(
*, distributed: bool, world_size: int, mode: str, dtype: str, seq_len: int
) -> str:
"""Build a results filename compatible with ``plot_scaling_results.py``.

Format: ``<topology>_<mode>_<dtype>_seq<seq_len>.json`` where ``<topology>``
is ``single_gpu`` for non-distributed runs and ``distributed_<N>gpu``
otherwise.
"""
topology = f"distributed_{world_size}gpu" if distributed else "single_gpu"
return f"{topology}_{mode}_{dtype}_seq{seq_len}.json"


DTYPE_MAP = {
"float32": torch.float32,
"float16": torch.float16,
Expand Down Expand Up @@ -317,10 +349,20 @@ def main():
f"Max peak allocated (across {dm.world_size} ranks): {max_peak_allocated_across_ranks / mb:.2f} MB"
)

if args.output_file:
with open(args.output_file, "w") as f:
if not args.print_only:
results_dir = Path(args.results_dir).expanduser()
results_dir.mkdir(parents=True, exist_ok=True)
fname = build_output_filename(
distributed=distributed,
world_size=dm.world_size,
mode=args.mode,
dtype=args.dtype,
seq_len=S,
)
output_path = results_dir / fname
with open(output_path, "w") as f:
json.dump(results, f, indent=2)
print(f"Results saved to {args.output_file}")
print(f"Results saved to {output_path}")


if __name__ == "__main__":
Expand Down
Loading