diff --git a/docs/guide/data-pipeline.md b/docs/guide/data-pipeline.md index a064649..a058300 100644 --- a/docs/guide/data-pipeline.md +++ b/docs/guide/data-pipeline.md @@ -87,7 +87,7 @@ For each model and batch size, it: 1. Extracts power timelines from benchmark runs 2. Resamples to a median-duration grid -3. Fits [`ITLMixtureModel`][mlenergy_data.modeling.ITLMixtureModel] distributions per batch size +3. Fits [`ITLMixtureModel`][mlenergy.data.modeling.ITLMixtureModel] distributions per batch size ``` ML.ENERGY Benchmark Dataset mlenergy-data @@ -129,20 +129,20 @@ $$p(x) = \frac{P_{\max}}{1 + \exp(-k_p(x - x_{0,p}))} + p_0, \quad x \triangleq where $P_{\max}$ is the saturation magnitude, $k_p$ controls transition sharpness, $x_{0,p}$ is the characteristic batch size threshold, and $p_0$ is an offset term. Latency and throughput use the same functional form with their own parameters. -OpenG2G uses [`LogisticModel`][mlenergy_data.modeling.LogisticModel] from [`mlenergy-data`](https://ml.energy/data) at both stages: +OpenG2G uses [`LogisticModel`][mlenergy.data.modeling.LogisticModel] from [`mlenergy-data`](https://ml.energy/data) at both stages: -- **Generation**: [`LogisticModel.fit(x, y)`][mlenergy_data.modeling.logistic.LogisticModel.fit] fits the curve to benchmark data -- **Runtime**: [`LogisticModel.eval(batch)`][mlenergy_data.modeling.logistic.LogisticModel.eval] evaluates the curve, and [`LogisticModel.deriv_wrt_x(x)`][mlenergy_data.modeling.logistic.LogisticModel.deriv_wrt_x] computes gradients for the OFO controller +- **Generation**: [`LogisticModel.fit(x, y)`][mlenergy.data.modeling.logistic.LogisticModel.fit] fits the curve to benchmark data +- **Runtime**: [`LogisticModel.eval(batch)`][mlenergy.data.modeling.logistic.LogisticModel.eval] evaluates the curve, and [`LogisticModel.deriv_wrt_x(x)`][mlenergy.data.modeling.logistic.LogisticModel.deriv_wrt_x] computes gradients for the OFO controller ## ITL Mixture Model Historical ITL measurements exhibit heavy-tailed behavior. The generation step captures this using a weighted mixture of two lognormal distributions per batch size. -OpenG2G uses [`ITLMixtureModel`][mlenergy_data.modeling.ITLMixtureModel] from [`mlenergy-data`](https://ml.energy/data) at both stages: +OpenG2G uses [`ITLMixtureModel`][mlenergy.data.modeling.ITLMixtureModel] from [`mlenergy-data`](https://ml.energy/data) at both stages: -- **Generation**: [`ITLMixtureModel.fit(samples)`][mlenergy_data.modeling.latency.ITLMixtureModel.fit] fits the mixture to raw ITL samples -- **Runtime**: [`ITLMixtureModel.sample_avg(n_replicas, rng)`][mlenergy_data.modeling.latency.ITLMixtureModel.sample_avg] draws average latency across replicas +- **Generation**: [`ITLMixtureModel.fit(samples)`][mlenergy.data.modeling.latency.ITLMixtureModel.fit] fits the mixture to raw ITL samples +- **Runtime**: [`ITLMixtureModel.sample_avg(n_replicas, rng)`][mlenergy.data.modeling.latency.ITLMixtureModel.sample_avg] draws average latency across replicas ## Training Trace Generation @@ -164,5 +164,5 @@ To use the dataset: At simulation time, the generated artifacts are consumed by two components: -- **[`OfflineDatacenter`][openg2g.datacenter.offline.OfflineDatacenter]**: Uses [`InferenceData`][openg2g.datacenter.workloads.inference.InferenceData] to replay periodic per-GPU power templates. Latency fits ([`ITLMixtureModel`][mlenergy_data.modeling.ITLMixtureModel]) are sampled at each control interval. +- **[`OfflineDatacenter`][openg2g.datacenter.offline.OfflineDatacenter]**: Uses [`InferenceData`][openg2g.datacenter.workloads.inference.InferenceData] to replay periodic per-GPU power templates. Latency fits ([`ITLMixtureModel`][mlenergy.data.modeling.ITLMixtureModel]) are sampled at each control interval. - **[`OFOBatchSizeController`][openg2g.controller.ofo.OFOBatchSizeController]**: Uses [`LogisticModelStore`][openg2g.controller.ofo.LogisticModelStore] for logistic curve evaluation. Calls `eval()` and `deriv_wrt_x()` at each control step to compute gradients. diff --git a/examples/model_insights/plots.py b/examples/model_insights/plots.py index 7fcf200..a13d390 100644 --- a/examples/model_insights/plots.py +++ b/examples/model_insights/plots.py @@ -41,10 +41,36 @@ COLORS = { "baseline": "#9A9A9A", - "ofo": "#4C72B0", - "h100": "#4C72B0", - "b200": "#C44E52", - "hardware": "#C44E52", + "ofo": "#1F77B4", # tab10 blue + "h100": "#1F77B4", # tab10 blue + "b200": "#D62728", # tab10 red + "hardware": "#D62728", # tab10 red +} + +_TAB10 = plt.get_cmap("tab10").colors + +# Stable per-model colors used wherever a model is identified by color +# (model-size figure, hardware figure, etc.). The hardware figure uses +# blue for Qwen 3 32B and red for Qwen 3 8B; that pair is canonical and +# the rest of the palette is filled in around it. +MODEL_COLORS: dict[str, str] = { + "Qwen 3 32B": _TAB10[0], # blue + "Qwen 3 30B A3B": _TAB10[1], # orange + "GPT-OSS 120B": _TAB10[2], # green + "Qwen 3 8B": _TAB10[3], # red + "Qwen 3 235B A22B": _TAB10[4], # purple + "Qwen 3 235B A22B Thinking": _TAB10[4], # same as 235B A22B + "Llama 3.1 70B": _TAB10[5], # brown + "Llama 3.1 405B": _TAB10[6], # pink +} + +# Distinct color pairs for the two parallelism panels so (a) and (b) read as +# separate experiments rather than the same one twice. The first pair reuses +# the default h100/b200 hues (blue, red); the second pair picks two of the +# remaining tab10 colors used by the model-size figure that are not in (a). +PARALLELISM_PAIR_COLORS = { + "gpt-oss-120b": (COLORS["h100"], COLORS["b200"]), + "qwen-235b-a22b-thinking": (_TAB10[1], _TAB10[2]), } DISPLAY_LABELS = { @@ -356,15 +382,21 @@ def plot_model_size( agg = _aggregate(df) all_variants = df["variant"].drop_duplicates().tolist() - # Sort variants by power-swing range (least flexible → most flexible). + # Compute power-swing range per variant. def _swing(variant: str) -> float: row = df[(df["variant"] == variant) & (df["mode"] == OFO_MODE)].iloc[0].to_dict() _, p_mw, _ = _pareto_from_row(row, logistic_models) return float(p_mw.max() - p_mw.min()) if len(p_mw) else 0.0 - variants = sorted(all_variants, key=_swing) - palette = plt.get_cmap("tab10").colors - color_map = {v: palette[i % len(palette)] for i, v in enumerate(variants)} + # Color is fixed per model identity (MODEL_COLORS), so the same model + # appears in the same color across model-size, hardware, and other + # figures. + color_map = {v: MODEL_COLORS[_pretty(v)] for v in all_variants} + + # Plot order: widest → narrowest, so the reference model with the largest + # feasible power range anchors the leftmost position and other models + # trail as comparisons. + variants = sorted(all_variants, key=_swing, reverse=True) def _val(v, mode, col): row = agg[(agg.variant == v) & (agg["mode"] == mode)] @@ -420,7 +452,8 @@ def _v(v, mode, col, sub=sub): r = sub[(sub["variant"] == v) & (sub["mode"] == mode)] return float(r[col].iloc[0]) if not r.empty else math.nan - color_map = {labels[i]: (COLORS["h100"] if i == 0 else COLORS["b200"]) for i in range(len(variants))} + pair_palette = PARALLELISM_PAIR_COLORS.get(pair, (COLORS["h100"], COLORS["b200"])) + color_map = {labels[i]: pair_palette[i] for i in range(len(variants))} # (a) integral violation — colored bars, hatch = baseline vs OFO fig, ax = _make_panel() @@ -484,9 +517,11 @@ def _v(pair_, hw, mode, col): r = agg[(agg["variant"] == v) & (agg["mode"] == mode)] return float(r[col].iloc[0]) if not r.empty else math.nan - # Two-color encoding (blue for the first model pair, red for the - # second), matching the parallelism and precision figures. - pair_colors = {p: (COLORS["h100"] if i == 0 else COLORS["b200"]) for i, p in enumerate(pairs)} + # Color each pair by its canonical model color (MODEL_COLORS), so + # e.g. Qwen 3 8B is the same red here as in the model-size figure. + pair_colors = { + p: MODEL_COLORS[_pretty(next(v for (pp, _), v in variants_by_pair_hw.items() if pp == p))] for p in pairs + } # (a) — integral violation. X-axis = hardware. Within each hardware # group, 4 bars: 2 models × (uncoord, coord). Color = model, hatch = @@ -654,7 +689,10 @@ def _v(v, mode, col): def _display_prec(label: str) -> str: return "BF16" if label == "bf16" else label - handles = [_combo_handle(color_map[lbl], _display_prec(lbl)) for lbl in sorted(set(labels))] + # Order: BF16 first (blue), then FP8 (red), matching the blue-then-red + # order used in the model-size, hardware, and parallelism legends. + ordered_labels = [lbl for lbl in ("bf16", "FP8") if lbl in set(labels)] + handles = [_combo_handle(color_map[lbl], _display_prec(lbl)) for lbl in ordered_labels] _save_legend(handles, out / f"precision_legend{suffix}", ncol=len(set(labels)), width=1.8) diff --git a/openg2g/controller/ofo.py b/openg2g/controller/ofo.py index 38304c7..871713d 100644 --- a/openg2g/controller/ofo.py +++ b/openg2g/controller/ofo.py @@ -14,8 +14,8 @@ from typing import Any import numpy as np -from mlenergy_data.modeling import LogisticModel -from mlenergy_data.records import LLMRuns +from mlenergy.data.modeling import LogisticModel +from mlenergy.data.records import LLMRuns from pydantic import BaseModel, ConfigDict from openg2g.clock import SimulationClock diff --git a/openg2g/datacenter/workloads/inference.py b/openg2g/datacenter/workloads/inference.py index b51ee73..4bee278 100644 --- a/openg2g/datacenter/workloads/inference.py +++ b/openg2g/datacenter/workloads/inference.py @@ -14,8 +14,8 @@ import numpy as np import pandas as pd -from mlenergy_data.modeling import ITLMixtureModel -from mlenergy_data.records import LLMRuns +from mlenergy.data.modeling import ITLMixtureModel +from mlenergy.data.records import LLMRuns from pydantic import BaseModel, ConfigDict import openg2g diff --git a/pyproject.toml b/pyproject.toml index 331990f..ab20202 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "pydantic>=2.0", "aiohttp", "zeus>=0.15.0", - "mlenergy-data", + "mlenergy-data>=0.4.0", ] [project.urls] diff --git a/tests/test_logistic.py b/tests/test_logistic.py index f1251ab..f032cb3 100644 --- a/tests/test_logistic.py +++ b/tests/test_logistic.py @@ -4,7 +4,7 @@ import math -from mlenergy_data.modeling import LogisticModel +from mlenergy.data.modeling import LogisticModel def test_eval_at_midpoint(): diff --git a/tests/test_offline_dc.py b/tests/test_offline_dc.py index f1b1f4c..b8b5160 100644 --- a/tests/test_offline_dc.py +++ b/tests/test_offline_dc.py @@ -6,7 +6,7 @@ import numpy as np import pytest -from mlenergy_data.modeling import ITLMixtureModel +from mlenergy.data.modeling import ITLMixtureModel from openg2g.clock import SimulationClock from openg2g.coordinator import SimulationLog diff --git a/tests/test_ofo_internals.py b/tests/test_ofo_internals.py index 59045de..d24b773 100644 --- a/tests/test_ofo_internals.py +++ b/tests/test_ofo_internals.py @@ -9,7 +9,7 @@ import numpy as np import pytest -from mlenergy_data.modeling import LogisticModel +from mlenergy.data.modeling import LogisticModel from openg2g.controller.ofo import ( OFOConfig, diff --git a/tests/test_ofo_observed_latency.py b/tests/test_ofo_observed_latency.py index 2178dfa..783b4db 100644 --- a/tests/test_ofo_observed_latency.py +++ b/tests/test_ofo_observed_latency.py @@ -3,7 +3,7 @@ from fractions import Fraction import numpy as np -from mlenergy_data.modeling import LogisticModel +from mlenergy.data.modeling import LogisticModel from openg2g.clock import SimulationClock from openg2g.common import ThreePhase diff --git a/uv.lock b/uv.lock index f63a01f..1df5ddc 100644 --- a/uv.lock +++ b/uv.lock @@ -1680,7 +1680,7 @@ wheels = [ [[package]] name = "mlenergy-data" -version = "0.3.2" +version = "0.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -1691,9 +1691,9 @@ dependencies = [ { name = "pyarrow" }, { name = "pyyaml" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ac/55/131b85ce36e2c9ce91612aea93b5912a4730eeac5f5f157647b9040b9f4a/mlenergy_data-0.3.2.tar.gz", hash = "sha256:613f89c508c8a2c328962f015a3c09db8f8fd52f78356e606b4dc5ff75102365", size = 35503, upload-time = "2026-03-23T21:38:21.542Z" } +sdist = { url = "https://files.pythonhosted.org/packages/37/d6/1958e8445023dfdc748018af1bf349d4f8a3c09d1c61316accc8fdacc12d/mlenergy_data-0.4.0.tar.gz", hash = "sha256:94119d4884d19348af340ecb8d36e75270e37b72ea1d606e9976aa6a3293ce4c", size = 37477, upload-time = "2026-05-07T02:10:30.823Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/24/3e/52e2de434bb6603d4159f945267def38c7b6e5898d4b887226cd35bf7e60/mlenergy_data-0.3.2-py3-none-any.whl", hash = "sha256:56d71c9e6504b8a59eda3a282ab79f68db4b19a374b3b09872b70d3778c84ebe", size = 27503, upload-time = "2026-03-23T21:38:20.677Z" }, + { url = "https://files.pythonhosted.org/packages/da/d8/4396dcb42dbfc9018f4c397df9e3304c7cbbc62f67afd6f6fe956cc1f889/mlenergy_data-0.4.0-py3-none-any.whl", hash = "sha256:5c793a9b625091ffcd2c6a071458e3bdf76d2461913868d2d8a48bb45907a71f", size = 28378, upload-time = "2026-05-07T02:10:29.593Z" }, ] [[package]] @@ -2299,7 +2299,7 @@ test = [ requires-dist = [ { name = "aiohttp" }, { name = "gymnasium", marker = "extra == 'rl'" }, - { name = "mlenergy-data" }, + { name = "mlenergy-data", specifier = ">=0.4.0" }, { name = "numpy" }, { name = "opendssdirect-py", marker = "extra == 'opendss'" }, { name = "pandas" },