Skip to content

Commit 751d03c

Browse files
committed
[API] Reorg core component constructors
1 parent 683fc75 commit 751d03c

19 files changed

Lines changed: 985 additions & 789 deletions

README.md

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -52,23 +52,27 @@ For a full walkthrough including data setup, see the [Getting Started guide](htt
5252
from fractions import Fraction
5353

5454
from openg2g.coordinator import Coordinator
55-
from openg2g.datacenter.config import DatacenterConfig, WorkloadConfig
56-
from openg2g.datacenter.offline import OfflineDatacenter, PowerTraceStore
55+
from openg2g.datacenter.config import DatacenterConfig
56+
from openg2g.datacenter.offline import (
57+
OfflineDatacenter, OfflineInferenceData, OfflineWorkload, PowerTraceStore,
58+
)
5759
from openg2g.grid.opendss import OpenDSSGrid
5860
from openg2g.controller.noop import NoopController
5961
from openg2g.models.spec import LLMInferenceModelSpec, LLMInferenceWorkload
6062
from openg2g.types import TapPosition
6163

6264
# 1. Set up a trace-based datacenter
63-
workload = WorkloadConfig(
64-
inference=LLMInferenceWorkload(models=(
65-
LLMInferenceModelSpec("Llama-3.1-8B", num_replicas=720, gpus_per_replica=1, initial_batch_size=128),
66-
LLMInferenceModelSpec("Llama-3.1-70B", num_replicas=180, gpus_per_replica=4, initial_batch_size=128),
67-
)),
68-
)
65+
models = LLMInferenceWorkload(models=(
66+
LLMInferenceModelSpec("Llama-3.1-8B", num_replicas=720, gpus_per_replica=1, initial_batch_size=128),
67+
LLMInferenceModelSpec("Llama-3.1-70B", num_replicas=180, gpus_per_replica=4, initial_batch_size=128),
68+
))
6969
store = PowerTraceStore.load("data/generated/traces_summary.csv")
7070
templates = store.build_templates(duration_s=3600, dt_s=Fraction(1, 10))
71-
dc = OfflineDatacenter(DatacenterConfig(), workload, template_store=templates, dt_s=Fraction(1, 10))
71+
dc = OfflineDatacenter(
72+
DatacenterConfig(),
73+
OfflineWorkload(inference_data=OfflineInferenceData(models, power_templates=templates)),
74+
dt_s=Fraction(1, 10),
75+
)
7276

7377
# 2. Set up the grid
7478
TAP_STEP = 0.00625
@@ -94,11 +98,7 @@ coord = Coordinator(
9498
log = coord.run()
9599
```
96100

97-
See [`examples/`](examples/) for complete simulation scripts:
98-
99-
- `run_baseline.py --mode no-tap`: fixed taps, no OFO
100-
- `run_baseline.py --mode tap-change`: scheduled tap changes, no OFO
101-
- `run_ofo.py`: OFO closed-loop batch size control
101+
See [`examples/`](examples/) for complete simulation scripts (offline trace-replay and online hardware-in-the-loop variants).
102102

103103
## Running Example Simulations
104104

@@ -127,20 +127,23 @@ python data/offline/generate_training_trace.py \
127127
# Baseline: fixed taps
128128
python examples/offline/run_baseline.py --mode no-tap \
129129
--data-dir data/generated \
130-
--training-trace data/generated/synthetic_training_trace.csv
130+
--training-trace data/generated/synthetic_training_trace.csv \
131+
--ieee-case-dir examples/ieee13
131132

132133
# Baseline: scheduled tap changes
133134
python examples/offline/run_baseline.py --mode tap-change \
134135
--data-dir data/generated \
135-
--training-trace data/generated/synthetic_training_trace.csv
136+
--training-trace data/generated/synthetic_training_trace.csv \
137+
--ieee-case-dir examples/ieee13
136138

137139
# OFO closed-loop control
138140
python examples/offline/run_ofo.py \
139141
--data-dir data/generated \
140-
--training-trace data/generated/synthetic_training_trace.csv
142+
--training-trace data/generated/synthetic_training_trace.csv \
143+
--ieee-case-dir examples/ieee13
141144
```
142145

143-
`--data-dir` and `--training-trace` are required for all simulation drivers.
146+
`--data-dir`, `--training-trace`, and `--ieee-case-dir` are required for all offline simulation drivers.
144147

145148
## Documentation
146149

data/offline/generate_training_trace.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
"""Generate a synthetic GPU training-like power trace.
22
3-
Extracted from baseline_wo_control.py. Produces a CSV with columns
4-
``t_s`` and ``power_W`` that can be used as a training overlay in
5-
the simulation.
3+
Produces a CSV with columns ``t_s`` and ``power_W`` that can be used as a
4+
training overlay in the simulation.
65
"""
76

87
from __future__ import annotations

docs/getting-started/quickstart.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,18 @@ These three runs correspond to the evaluation cases in the [GPU-to-Grid paper](h
4848
```bash
4949
python examples/offline/run_baseline.py --mode no-tap \
5050
--data-dir data/generated \
51-
--training-trace data/generated/synthetic_training_trace.csv
51+
--training-trace data/generated/synthetic_training_trace.csv \
52+
--ieee-case-dir examples/ieee13
5253
5354
python examples/offline/run_baseline.py --mode tap-change \
5455
--data-dir data/generated \
55-
--training-trace data/generated/synthetic_training_trace.csv
56+
--training-trace data/generated/synthetic_training_trace.csv \
57+
--ieee-case-dir examples/ieee13
5658
5759
python examples/offline/run_ofo.py \
5860
--data-dir data/generated \
59-
--training-trace data/generated/synthetic_training_trace.csv
61+
--training-trace data/generated/synthetic_training_trace.csv \
62+
--ieee-case-dir examples/ieee13
6063
```
6164

6265
Outputs (plots and logs) are saved to `outputs/baseline_no-tap/`, `outputs/baseline_tap-change/`, and `outputs/ofo/`.

docs/guide/architecture.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,12 @@ __init__() ──> reset() ──> start() ──> step() / apply_control() ─
152152
This is mainly to allow reuse component objects across multiple [`Coordinator.run()`][openg2g.coordinator.Coordinator.run] calls with different configurations without having to re-instantiate all of them all the time:
153153

154154
```python
155-
grid = OpenDSSGrid(...) # stores config only
156-
ctrl = OFOBatchSizeController(...) # stores fits + tuning
157-
for config in datacenter_configs:
158-
dc = OfflineDatacenter(**config) # builds power templates
155+
grid = OpenDSSGrid(...) # stores config only
156+
ctrl = OFOBatchSizeController(...) # stores fits + tuning
157+
for workload in workloads:
158+
dc = OfflineDatacenter(dc_config, workload, dt_s=dt) # builds power templates
159159
coord = Coordinator(dc, grid, [ctrl], total_duration_s=3600, dc_bus="671")
160-
log = coord.run() # reset -> start -> loop -> stop
160+
log = coord.run() # reset -> start -> loop -> stop
161161
```
162162

163163

docs/guide/building-simulators.md

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,43 +26,46 @@ The sections below show how to set up each component.
2626
### Datacenter
2727

2828
The [`OfflineDatacenter`][openg2g.datacenter.offline.OfflineDatacenter] replays GPU power traces built from the [data pipeline](data-pipeline.md).
29-
Load traces from the generated manifest, build templates for the simulation config, then create the datacenter with a [`DatacenterConfig`][openg2g.datacenter.config.DatacenterConfig] and [`WorkloadConfig`][openg2g.datacenter.config.WorkloadConfig]:
29+
Load traces from the generated manifest, build templates for the simulation config, then create the datacenter with a [`DatacenterConfig`][openg2g.datacenter.config.DatacenterConfig] and [`OfflineWorkload`][openg2g.datacenter.offline.OfflineWorkload]:
3030

3131
```python
3232
from fractions import Fraction
3333

34-
from openg2g.datacenter.config import DatacenterConfig, WorkloadConfig
35-
from openg2g.datacenter.offline import OfflineDatacenter, PowerTraceStore
34+
from openg2g.datacenter.config import DatacenterConfig, PowerAugmentationConfig, ServerRamp, TrainingRun
35+
from openg2g.datacenter.offline import OfflineDatacenter, OfflineInferenceData, OfflineWorkload, PowerTraceStore
3636
from openg2g.datacenter.training_overlay import TrainingTrace
3737
from openg2g.models.spec import LLMInferenceModelSpec, LLMInferenceWorkload
38-
from openg2g.types import ServerRamp, TrainingRun
3938

4039
store = PowerTraceStore.load("data/generated/traces_summary.csv")
4140
templates = store.build_templates(duration_s=3600.0, dt_s=Fraction(1, 10))
4241

4342
training_trace = TrainingTrace.load("data/generated/synthetic_training_trace.csv")
4443

45-
workload = WorkloadConfig(
46-
inference=LLMInferenceWorkload(models=(
47-
LLMInferenceModelSpec(
48-
model_label="Llama-3.1-8B", num_replicas=720, gpus_per_replica=1, initial_batch_size=128,
49-
),
50-
LLMInferenceModelSpec(
51-
model_label="Llama-3.1-70B", num_replicas=180, gpus_per_replica=4, initial_batch_size=128,
52-
),
53-
)),
44+
workload = OfflineWorkload(
45+
inference_data=OfflineInferenceData(
46+
LLMInferenceWorkload(models=(
47+
LLMInferenceModelSpec(
48+
model_label="Llama-3.1-8B", num_replicas=720, gpus_per_replica=1, initial_batch_size=128,
49+
),
50+
LLMInferenceModelSpec(
51+
model_label="Llama-3.1-70B", num_replicas=180, gpus_per_replica=4, initial_batch_size=128,
52+
),
53+
)),
54+
power_templates=templates,
55+
),
5456
server_ramps=ServerRamp(t_start=2500.0, t_end=3000.0, target=0.2),
5557
training=TrainingRun(t_start=1000.0, t_end=2000.0, n_gpus=2400, trace=training_trace),
5658
)
5759

5860
dc = OfflineDatacenter(
59-
datacenter=DatacenterConfig(gpus_per_server=8, base_kw_per_phase=500.0),
60-
workload=workload,
61-
template_store=templates,
61+
DatacenterConfig(gpus_per_server=8, base_kw_per_phase=500.0),
62+
workload,
6263
dt_s=Fraction(1, 10),
6364
seed=0,
64-
amplitude_scale_range=(0.98, 1.02),
65-
noise_fraction=0.005,
65+
power_augmentation=PowerAugmentationConfig(
66+
amplitude_scale_range=(0.98, 1.02),
67+
noise_fraction=0.005,
68+
),
6669
)
6770
```
6871

docs/guide/data-pipeline.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,13 @@ At simulation time, the generated artifacts are consumed by two components:
130130
```bash
131131
python examples/offline/run_baseline.py --mode no-tap \
132132
--data-dir data/generated \
133-
--training-trace data/generated/synthetic_training_trace.csv
133+
--training-trace data/generated/synthetic_training_trace.csv \
134+
--ieee-case-dir examples/ieee13
134135

135136
python examples/offline/run_ofo.py \
136137
--data-dir data/generated \
137-
--training-trace data/generated/synthetic_training_trace.csv
138+
--training-trace data/generated/synthetic_training_trace.csv \
139+
--ieee-case-dir examples/ieee13
138140
```
139141

140-
`--data-dir` and `--training-trace` are required for all simulation drivers.
142+
`--data-dir`, `--training-trace`, and `--ieee-case-dir` are required for all offline simulation drivers.

examples/offline/plotting.py

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
"""Plotting functions and data-loading helpers for openg2g simulation results.
22
3-
Faithfully ports the matplotlib code from ``baseline_wo_control.py`` and
4-
``final_ofo_test.py`` (the G2G paper reference scripts), adapted to read data
5-
from the library's ``SimulationLog`` and ``LLMDatacenterState`` objects.
3+
Reproduces the figures from the G2G paper, reading data from the library's
4+
``SimulationLog`` and ``LLMDatacenterState`` objects.
65
76
This module lives outside the ``openg2g`` library on purpose: the library
87
exports simulation state and metrics, while all matplotlib-dependent
@@ -32,8 +31,7 @@
3231
Figure = matplotlib.figure.Figure
3332

3433
# ── Bus color map (IEEE 13-bus, tab20-based) ─────────────────────────
35-
# Deterministic colors from baseline_wo_control.py so all voltage plots
36-
# use consistent bus coloring.
34+
# Deterministic colors so all voltage plots use consistent bus coloring.
3735

3836
BUS_COLOR_MAP: dict[str, Any] = {
3937
"611": (0.1216, 0.4667, 0.7059, 1.0),
@@ -121,7 +119,6 @@ def _bus_sort_key(b: str) -> tuple[int, str]:
121119

122120
# ══════════════════════════════════════════════════════════════════════
123121
# Paper Fig. 5: 2-panel, 3-phase power (MW) + per-model average ITL
124-
# (ported from baseline_wo_control.py plot_power_latency_subfigs_compact)
125122
# ══════════════════════════════════════════════════════════════════════
126123

127124

@@ -237,7 +234,6 @@ def plot_power_and_itl_2panel(
237234

238235
# ══════════════════════════════════════════════════════════════════════
239236
# Paper Fig. 6 / Fig. 7: Per-phase all-bus voltages with bus colormap
240-
# (ported from both baseline_wo_control.py and final_ofo_test.py)
241237
# ══════════════════════════════════════════════════════════════════════
242238

243239

@@ -415,7 +411,6 @@ def plot_allbus_voltages_per_phase(
415411

416412
# ══════════════════════════════════════════════════════════════════════
417413
# Paper Fig. 8: 4-panel, batch, power/replica, ITL, throughput (OFO)
418-
# (ported from final_ofo_test.py plot_model_timeseries_4panel_compact)
419414
# ══════════════════════════════════════════════════════════════════════
420415

421416

@@ -610,7 +605,7 @@ def _apply_overlays(ax: Axes) -> None:
610605

611606

612607
# ══════════════════════════════════════════════════════════════════════
613-
# Standalone plots (nice-to-have, ported from final_ofo_test.py)
608+
# Standalone plots
614609
# ══════════════════════════════════════════════════════════════════════
615610

616611

@@ -943,28 +938,3 @@ def _draw_schedule_lines(ax: Axes) -> None:
943938
fig.savefig(save_path, bbox_inches="tight")
944939
plt.close(fig)
945940
return fig
946-
947-
948-
# ══════════════════════════════════════════════════════════════════════
949-
# Shared data-loading helpers
950-
# ══════════════════════════════════════════════════════════════════════
951-
952-
953-
def load_itl_fits_from_csv(csv_path: Path | str) -> dict[str, dict[int, Any]]:
954-
"""Load ITL mixture fits from a CSV.
955-
956-
Returns:
957-
{model_label: {batch_size: ITLMixtureModel}}.
958-
"""
959-
import pandas as pd
960-
from mlenergy_data.modeling import ITLMixtureModel
961-
962-
df = pd.read_csv(csv_path)
963-
result: dict[str, dict[int, Any]] = {}
964-
for row in df.to_dict(orient="records"):
965-
model = str(row["model_label"]).strip()
966-
batch = int(row["max_num_seqs"])
967-
result.setdefault(model, {})[batch] = ITLMixtureModel.from_dict(row)
968-
if not result:
969-
raise ValueError(f"No ITL mixture rows loaded from {csv_path}")
970-
return result

0 commit comments

Comments
 (0)