Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
626 changes: 626 additions & 0 deletions 0001-feature-add-per_constraints-unit_test-and-docs.patch

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions docs/getting-started/benchmark.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,40 @@ GuideLLM supports several benchmark profiles and strategies:
- `poisson`: Sends requests following a Poisson distribution
- `sweep`: Automatically determines optimal performance points (default)

### Per-Strategy Constraints in `sweep`

Sweep benchmarks execute a fixed sequence—synchronous → throughput → async rates—so you can set limits for each stage with `per_constraints`. Two entry points support it:

#### CLI Example

```bash
guidellm benchmark run \
--profile sweep \
--rate 5 \
--target "http://localhost:8000" \
--data "prompt_tokens=256,output_tokens=128" \
--per-constraints '{"max_seconds":[5,10,15,15,20], "max_requests":[100, 200, 200, 400, 400]}'
```

Values are applied in order across the sweep strategies; Providing `--per-constraints` with any non-sweep profile raises a validation error to prevent accidental misuse.

#### Scenario Example

```json
{
"target": "http://localhost:8000",
"data": ["prompt_tokens=256,output_tokens=128"],
"profile": "sweep",
"rate": 5,
"per_constraints": {
"max_seconds": [5,10,15,15,20],
"max_requests": [100, 200, 200, 400, 400]
}
}
```

Running `guidellm benchmark run --scenario my_sweep.json` automatically applies these per-strategy settings without additional CLI flags.

### Data Options

For synthetic data, some key options include, among others:
Expand Down
7 changes: 7 additions & 0 deletions src/guidellm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,13 @@ def benchmark():
flag_value='{"enabled": true}',
help="Enable over-saturation detection with default settings.",
)
@click.option(
"--per-constraints",
callback=cli_tools.parse_json,
default=None,
help="Per-strategy constraints for sweep mode. Format: {'constraint_name': [value1, value2, ...]}",
)

def run(**kwargs): # noqa: C901
# Only set CLI args that differ from click defaults
kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)
Expand Down
23 changes: 23 additions & 0 deletions src/guidellm/benchmark/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ async def resolve_profile(
max_global_error_rate: float | None,
over_saturation: dict[str, Any] | None = None,
console: Console | None = None,
per_constraints: dict[str, Any] | None = None,
**kwargs: Any,
) -> Profile:
"""
Resolve and configure a benchmark profile with rate and constraint settings.
Expand All @@ -346,6 +348,7 @@ async def resolve_profile(
:param max_global_error_rate: Maximum global error rate threshold before stopping
:param over_saturation: Over-saturation detection configuration (dict)
:param console: Console instance for progress reporting, or None
:param per_constraints: Per-strategy constraints (sweep profile only)
:return: Configured Profile instance ready for benchmarking
:raises ValueError: If constraints are provided with a pre-configured Profile
"""
Expand All @@ -367,18 +370,37 @@ async def resolve_profile(
constraints[key] = val

if not isinstance(profile, Profile):
supported_per_constraints = {"sweep"}
profile_kwargs = dict(kwargs)
if per_constraints:
profile_type = profile if isinstance(profile, str) else str(profile)
if profile_type not in supported_per_constraints:
raise ValueError(
"Per-strategy constraints are only supported with the 'sweep' profile."
)
# Validate no nulls in per_constraints lists
for key, val_list in per_constraints.items():
if isinstance(val_list, list) and any(item is None for item in val_list):
raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.")
profile_kwargs["per_constraints"] = per_constraints

profile = Profile.create(
rate_type=profile,
rate=rate,
random_seed=random_seed,
rampup_duration=rampup,
constraints={**constraints},
**profile_kwargs,
)
elif constraints:
raise ValueError(
"Constraints must be empty when providing a Profile instance. "
f"Provided constraints: {constraints} ; provided profile: {profile}"
)
elif per_constraints:
raise ValueError(
"Per-strategy constraints cannot be applied when providing a Profile instance."
)
elif rampup > 0.0:
raise ValueError(
"Ramp-up duration must not be set when providing a Profile instance. "
Expand Down Expand Up @@ -505,6 +527,7 @@ async def benchmark_generative_text(
max_global_error_rate=args.max_global_error_rate,
over_saturation=args.over_saturation,
console=console,
per_constraints=args.per_constraints,
)
output_formats = await resolve_output_formats(
outputs=args.outputs, output_dir=args.output_dir, console=console
Expand Down
69 changes: 69 additions & 0 deletions src/guidellm/benchmark/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,34 @@ class SweepProfile(Profile):
default_factory=list,
description="Interpolated rates between synchronous and throughput",
)
per_constraints: dict[str, list[Any]] | None = Field(
default=None,
description="Per-strategy constraints only valid in sweep mode",
)

@field_validator("per_constraints", mode="before")
@classmethod
def validate_per_constraints(cls, value: Any) -> dict[str, list[Any]] | None:
"""
Validate that per_constraints doesn't contain null values in the lists.

:param value: Input value for per_constraints field
:return: Validated per_constraints dictionary
"""
if value is None:
return None

if not isinstance(value, dict):
return value

for key, val_list in value.items():
if not isinstance(val_list, list):
continue

if any(item is None for item in val_list):
raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.")

return value

@classmethod
def resolve_args(
Expand All @@ -632,7 +660,48 @@ def resolve_args(
kwargs["random_seed"] = random_seed
if rate_type in ["constant", "poisson"]:
kwargs["strategy_type"] = rate_type
if "per_constraints" in kwargs:
# Already in the correct format, keep it
pass
elif "constraints" in kwargs:
# Backward compatibility: split into per-strategy and shared constraints
constraints = kwargs["constraints"]
if isinstance(constraints, dict):
shared_constraints = {}
per_constraints = {}
for key, val in constraints.items():
if isinstance(val, list):
per_constraints[key] = val
else:
shared_constraints[key] = val
kwargs["constraints"] = shared_constraints or None
kwargs["per_constraints"] = per_constraints or None
return kwargs
def next_strategy_constraints(
self,
next_strategy: SchedulingStrategy | None,
prev_strategy: SchedulingStrategy | None,
prev_benchmark: Benchmark | None,
) -> dict[str, Constraint] | None:
if not next_strategy:
return None

current_index = len(self.completed_strategies)
final_constraints: dict[str, Any] = dict(self.constraints or {})

if self.per_constraints:
for key, val in self.per_constraints.items():
if 0 <= current_index < self.sweep_size:
constraint_val = val[current_index]
if constraint_val is None:
final_constraints.pop(key, None)
else:
final_constraints[key] = constraint_val
return (
ConstraintsInitializerFactory.resolve(final_constraints)
if final_constraints
else None
)

@property
def strategy_types(self) -> list[str]:
Expand Down
25 changes: 25 additions & 0 deletions src/guidellm/benchmark/schemas/generative/entrypoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,31 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
"moe_threshold, etc.)."
),
)
per_constraints: dict[str, Any] | None = Field(
default=None,
description="Specified constraints to apply to the sweep profile",
)

@field_validator("per_constraints", mode="before")
@classmethod
def validate_per_constraints(cls, value: Any) -> dict[str, Any] | None:
"""
Validate that per_constraints doesn't contain null values in the lists.

:param value: Input value for per_constraints field
:return: Validated per_constraints dictionary
"""
if value is None:
return None

if not isinstance(value, dict):
return value

for key, val_list in value.items():
if isinstance(val_list, list) and any(item is None for item in val_list):
raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.")

return value

@field_validator("data", "data_args", "rate", mode="wrap")
@classmethod
Expand Down
157 changes: 157 additions & 0 deletions tests/unit/benchmark/test_per_constraints.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import json
from pathlib import Path

import pytest

from guidellm.benchmark.entrypoints import resolve_profile
from guidellm.benchmark.profiles import SweepProfile, SynchronousProfile
from guidellm.benchmark.schemas import BenchmarkGenerativeTextArgs
from guidellm.scheduler import (
AsyncConstantStrategy,
ConstraintsInitializerFactory,
SynchronousStrategy,
ThroughputStrategy,
)

@pytest.mark.smoke
@pytest.mark.asyncio
async def test_resolve_profile_allows_per_constraints_for_sweep():
profile = await resolve_profile(
profile="sweep",
rate=[5],
random_seed=123,
rampup=0.0,
constraints={},
max_seconds=None,
max_requests=None,
max_errors=None,
max_error_rate=None,
max_global_error_rate=None,
console=None,
per_constraints={"max_seconds": [1, 2, 3, 4, 5]},
)

assert isinstance(profile, SweepProfile)
assert profile.per_constraints == {"max_seconds": [1, 2, 3, 4, 5]}

@pytest.mark.smoke
@pytest.mark.asyncio
async def test_resolve_profile_rejects_per_constraints_for_non_sweep():
with pytest.raises(ValueError, match="Per-strategy constraints are only supported with the 'sweep' profile."):
await resolve_profile(
profile="synchronous",
rate=None,
random_seed=123,
rampup=0.0,
constraints={},
max_seconds=None,
max_requests=None,
max_errors=None,
max_error_rate=None,
max_global_error_rate=None,
console=None,
per_constraints={"max_seconds": [1]},
)

@pytest.mark.smoke
@pytest.mark.asyncio
async def test_resolve_profile_rejects_per_constraints_for_instances():
synchronous_profile = SynchronousProfile()

with pytest.raises(
ValueError, match="Per-strategy constraints cannot be applied"
):
await resolve_profile(
profile=synchronous_profile,
rate=None,
random_seed=123,
rampup=0.0,
constraints={},
max_seconds=None,
max_requests=None,
max_errors=None,
max_error_rate=None,
max_global_error_rate=None,
console=None,
per_constraints={"max_seconds": [1]},
)

@pytest.mark.smoke
def test_sweep_profile_applies_per_constraints_sequence(monkeypatch):
captured: list[dict[str, int]] = []

def fake_resolve(value):
captured.append(value)
return value

monkeypatch.setattr(
ConstraintsInitializerFactory, "resolve", staticmethod(fake_resolve)
)

profile = SweepProfile(
sweep_size=3,
per_constraints={"max_seconds": [5, 10, 15]},
constraints={"max_seconds": 30, "max_requests": 100},
)

sync = SynchronousStrategy()
profile.next_strategy_constraints(sync, None, None)
assert captured[-1]["max_seconds"] == 5
assert captured[-1]["max_requests"] == 100

profile.completed_strategies.append(sync)
throughput = ThroughputStrategy(max_concurrency=1, rampup_duration=0.0)
profile.next_strategy_constraints(throughput, sync, None)
assert captured[-1]["max_seconds"] == 10
assert captured[-1]["max_requests"] == 100

profile.completed_strategies.append(throughput)
async_strategy = AsyncConstantStrategy(rate=1.0, max_concurrency=None)
profile.next_strategy_constraints(async_strategy, throughput, None)
assert captured[-1]["max_seconds"] == 15
assert captured[-1]["max_requests"] == 100

@pytest.mark.smoke
def test_benchmark_args_accept_per_constraints_from_scenario(tmp_path: Path):
scenario_path = tmp_path / "scenario.json"
scenario_content = {
"target": "http://localhost:9000",
"data": ["prompt_tokens=8,output_tokens=8"],
"profile": "sweep",
"rate": 5,
"per_constraints": {"max_seconds": [5, 10, 15, 15, 20], "max_requests": [100, 200, 200, 400, 400]},
}
scenario_path.write_text(json.dumps(scenario_content))

args = BenchmarkGenerativeTextArgs.create(scenario=scenario_path)

assert args.per_constraints == {"max_seconds": [5, 10, 15, 15, 20], "max_requests": [100, 200, 200, 400, 400]}


@pytest.mark.smoke
@pytest.mark.asyncio
async def test_resolve_profile_rejects_null_per_constraints():
with pytest.raises(ValueError, match="Per-strategy constraints for 'max_seconds' contain null values, which are not allowed."):
await resolve_profile(
profile="sweep",
rate=[5],
random_seed=123,
rampup=0.0,
constraints={},
max_seconds=None,
max_requests=None,
max_errors=None,
max_error_rate=None,
max_global_error_rate=None,
console=None,
per_constraints={"max_seconds": [5, None, 15, 20, 25, 30]},
)


@pytest.mark.smoke
def test_sweep_profile_rejects_null_per_constraints():
with pytest.raises(ValueError, match="Per-strategy constraints for 'max_requests' contain null values, which are not allowed."):
SweepProfile(
sweep_size=5,
per_constraints={"max_requests": [100, None, 200, 300, 400]},
)
Loading