From 9a99e6aba5eb2681b75a884b06d435f4d8e64901 Mon Sep 17 00:00:00 2001 From: AiKiAi-stack Date: Sun, 7 Dec 2025 22:11:43 -0800 Subject: [PATCH] Add `per-constraints` feature for `Sweep`, with docs and tests --- ...d-per_constraints-unit_test-and-docs.patch | 626 ++++++++++++++++++ docs/getting-started/benchmark.md | 34 + src/guidellm/__main__.py | 7 + src/guidellm/benchmark/entrypoints.py | 23 + src/guidellm/benchmark/profiles.py | 69 ++ .../schemas/generative/entrypoints.py | 25 + tests/unit/benchmark/test_per_constraints.py | 157 +++++ tests/unit/benchmark/test_profiles.py | 126 ++++ tests/unit/test_main.py | 34 +- 9 files changed, 1100 insertions(+), 1 deletion(-) create mode 100644 0001-feature-add-per_constraints-unit_test-and-docs.patch create mode 100644 tests/unit/benchmark/test_per_constraints.py create mode 100644 tests/unit/benchmark/test_profiles.py diff --git a/0001-feature-add-per_constraints-unit_test-and-docs.patch b/0001-feature-add-per_constraints-unit_test-and-docs.patch new file mode 100644 index 000000000..2d85b887b --- /dev/null +++ b/0001-feature-add-per_constraints-unit_test-and-docs.patch @@ -0,0 +1,626 @@ +From 72f248d6328d66211b148239d9e0cea54b28c80f Mon Sep 17 00:00:00 2001 +From: AiKiAi-stack +Date: Wed, 26 Nov 2025 13:12:12 +0800 +Subject: [PATCH] feature: add `per_constraints`, unit_test and docs + +--- + docs/getting-started/benchmark.md | 34 ++++ + src/guidellm/__main__.py | 6 + + src/guidellm/benchmark/entrypoints.py | 29 +++- + src/guidellm/benchmark/profiles.py | 73 ++++++++ + .../schemas/generative/entrypoints.py | 25 +++ + tests/unit/benchmark/test_per_constraints.py | 157 ++++++++++++++++++ + tests/unit/benchmark/test_profiles.py | 128 ++++++++++++++ + tests/unit/test_main.py | 33 +++- + 8 files changed, 479 insertions(+), 6 deletions(-) + create mode 100644 tests/unit/benchmark/test_per_constraints.py + create mode 100644 tests/unit/benchmark/test_profiles.py + +diff --git a/docs/getting-started/benchmark.md b/docs/getting-started/benchmark.md +index 24a3204..afbccb9 100644 +--- a/docs/getting-started/benchmark.md ++++ b/docs/getting-started/benchmark.md +@@ -62,6 +62,40 @@ GuideLLM supports several benchmark profiles and strategies: + - `poisson`: Sends requests following a Poisson distribution + - `sweep`: Automatically determines optimal performance points (default) + ++### Per-Strategy Constraints in `sweep` ++ ++Sweep benchmarks execute a fixed sequence—synchronous → throughput → async rates—so you can set limits for each stage with `per_constraints`. Two entry points support it: ++ ++#### CLI Example ++ ++```bash ++guidellm benchmark run \ ++ --profile sweep \ ++ --rate 5 \ ++ --target "http://localhost:8000" \ ++ --data "prompt_tokens=256,output_tokens=128" \ ++ --per-constraints '{"max_seconds":[5,10,15,15,20], "max_requests":[100, 200, 200, 400, 400]}' ++``` ++ ++Values are applied in order across the sweep strategies; Providing `--per-constraints` with any non-sweep profile raises a validation error to prevent accidental misuse. ++ ++#### Scenario Example ++ ++```json ++{ ++ "target": "http://localhost:8000", ++ "data": ["prompt_tokens=256,output_tokens=128"], ++ "profile": "sweep", ++ "rate": 5, ++ "per_constraints": { ++ "max_seconds": [5,10,15,15,20], ++ "max_requests": [100, 200, 200, 400, 400] ++ } ++} ++``` ++ ++Running `guidellm benchmark run --scenario my_sweep.json` automatically applies these per-strategy settings without additional CLI flags. ++ + ### Data Options + + For synthetic data, some key options include, among others: +diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py +index 8506b08..176b7e1 100644 +--- a/src/guidellm/__main__.py ++++ b/src/guidellm/__main__.py +@@ -384,6 +384,12 @@ def benchmark(): + default=BenchmarkGenerativeTextArgs.get_default("max_global_error_rate"), + help="Maximum global error rate across all benchmarks.", + ) ++@click.option( ++ "--per-constraints", ++ callback=cli_tools.parse_json, ++ default=None, ++ help="Per-strategy constraints for sweep mode. Format: {'constraint_name': [value1, value2, ...]}", ++) + def run(**kwargs): + # Only set CLI args that differ from click defaults + kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs) +diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py +index 5b57b22..6b40403 100644 +--- a/src/guidellm/benchmark/entrypoints.py ++++ b/src/guidellm/benchmark/entrypoints.py +@@ -324,6 +324,8 @@ async def resolve_profile( + max_error_rate: float | None, + max_global_error_rate: float | None, + console: Console | None = None, ++ per_constraints: dict[str, Any] | None = None, ++ **kwargs: Any, + ) -> Profile: + """ + Resolve and configure a benchmark profile with rate and constraint settings. +@@ -343,6 +345,7 @@ async def resolve_profile( + :param max_errors: Maximum number of errors before stopping + :param max_error_rate: Maximum error rate threshold before stopping + :param max_global_error_rate: Maximum global error rate threshold before stopping ++ :param per_constraints: Per-strategy constraints (sweep profile only) + :param console: Console instance for progress reporting, or None + :return: Configured Profile instance ready for benchmarking + :raises ValueError: If constraints are provided with a pre-configured Profile +@@ -362,20 +365,35 @@ async def resolve_profile( + }.items(): + if val is not None: + constraints[key] = val +- + if not isinstance(profile, Profile): ++ supported_per_constraints = {"sweep"} ++ profile_kwargs = dict(kwargs) ++ profile_kwargs["constraints"] = constraints ++ if per_constraints: ++ profile_type = profile if isinstance(profile, str) else str(profile) ++ if profile_type not in supported_per_constraints: ++ raise ValueError( ++ "Per-strategy constraints are only supported with the 'sweep' profile." ++ ) ++ # Validate no nulls in per_constraints lists ++ for key, val_list in per_constraints.items(): ++ if isinstance(val_list, list) and any(item is None for item in val_list): ++ raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.") ++ profile_kwargs["per_constraints"] = per_constraints + profile = Profile.create( + rate_type=profile, + rate=rate, + random_seed=random_seed, + rampup_duration=rampup, +- constraints={**constraints}, ++ **profile_kwargs, + ) +- elif constraints: ++ elif per_constraints: + raise ValueError( +- "Constraints must be empty when providing a Profile instance. " +- f"Provided constraints: {constraints} ; provided profile: {profile}" ++ "Per-strategy constraints cannot be applied when providing a Profile instance." + ) ++ elif constraints: ++ # If profile is already an instance, it shouldn't be modified by constraints ++ pass + elif rampup > 0.0: + raise ValueError( + "Ramp-up duration must not be set when providing a Profile instance. " +@@ -501,6 +519,7 @@ async def benchmark_generative_text( + max_error_rate=args.max_error_rate, + max_global_error_rate=args.max_global_error_rate, + console=console, ++ per_constraints=args.per_constraints, + ) + output_formats = await resolve_output_formats( + outputs=args.outputs, output_dir=args.output_dir, console=console +diff --git a/src/guidellm/benchmark/profiles.py b/src/guidellm/benchmark/profiles.py +index 8487228..45b0cc6 100644 +--- a/src/guidellm/benchmark/profiles.py ++++ b/src/guidellm/benchmark/profiles.py +@@ -609,6 +609,35 @@ class SweepProfile(Profile): + default_factory=list, + description="Interpolated rates between synchronous and throughput", + ) ++ per_constraints: dict[str, list[Any]] | None = Field( ++ default=None, ++ description="Per-strategy constraints only valid in sweep mode", ++ ) ++ ++ @field_validator("per_constraints", mode="before") ++ @classmethod ++ def validate_per_constraints(cls, value: Any) -> dict[str, list[Any]] | None: ++ """ ++ Validate that per_constraints doesn't contain null values in the lists. ++ ++ :param value: Input value for per_constraints field ++ :return: Validated per_constraints dictionary ++ """ ++ if value is None: ++ return None ++ ++ if not isinstance(value, dict): ++ return value ++ ++ for key, val_list in value.items(): ++ if not isinstance(val_list, list): ++ continue ++ ++ if any(item is None for item in val_list): ++ raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.") ++ ++ return value ++ + + @classmethod + def resolve_args( +@@ -632,8 +661,52 @@ class SweepProfile(Profile): + kwargs["random_seed"] = random_seed + if rate_type in ["constant", "poisson"]: + kwargs["strategy_type"] = rate_type ++ ++ # Handle per-strategy constraints ++ if "per_constraints" in kwargs: ++ # Already in the correct format, keep it ++ pass ++ elif "constraints" in kwargs: ++ # Backward compatibility: split into per-strategy and shared constraints ++ constraints = kwargs["constraints"] ++ if isinstance(constraints, dict): ++ shared_constraints = {} ++ per_constraints = {} ++ for key, val in constraints.items(): ++ if isinstance(val, list): ++ per_constraints[key] = val ++ else: ++ shared_constraints[key] = val ++ kwargs["constraints"] = shared_constraints or None ++ kwargs["per_constraints"] = per_constraints or None + return kwargs + ++ def next_strategy_constraints( ++ self, ++ next_strategy: SchedulingStrategy | None, ++ prev_strategy: SchedulingStrategy | None, ++ prev_benchmark: Benchmark | None, ++ ) -> dict[str, Constraint] | None: ++ if not next_strategy: ++ return None ++ ++ current_index = len(self.completed_strategies) ++ final_constraints: dict[str, Any] = dict(self.constraints or {}) ++ ++ if self.per_constraints: ++ for key, val in self.per_constraints.items(): ++ if 0 <= current_index < self.sweep_size: ++ constraint_val = val[current_index] ++ if constraint_val is None: ++ final_constraints.pop(key, None) ++ else: ++ final_constraints[key] = constraint_val ++ return ( ++ ConstraintsInitializerFactory.resolve(final_constraints) ++ if final_constraints ++ else None ++ ) ++ + @property + def strategy_types(self) -> list[str]: + """ +diff --git a/src/guidellm/benchmark/schemas/generative/entrypoints.py b/src/guidellm/benchmark/schemas/generative/entrypoints.py +index a080daa..fc63bec 100644 +--- a/src/guidellm/benchmark/schemas/generative/entrypoints.py ++++ b/src/guidellm/benchmark/schemas/generative/entrypoints.py +@@ -283,6 +283,31 @@ class BenchmarkGenerativeTextArgs(StandardBaseModel): + max_global_error_rate: float | None = Field( + default=None, description="Maximum global error rate (0-1) before stopping" + ) ++ per_constraints: dict[str, Any] | None = Field( ++ default=None, ++ description="Specified constraints to apply to the sweep profile", ++ ) ++ ++ @field_validator("per_constraints", mode="before") ++ @classmethod ++ def validate_per_constraints(cls, value: Any) -> dict[str, Any] | None: ++ """ ++ Validate that per_constraints doesn't contain null values in the lists. ++ ++ :param value: Input value for per_constraints field ++ :return: Validated per_constraints dictionary ++ """ ++ if value is None: ++ return None ++ ++ if not isinstance(value, dict): ++ return value ++ ++ for key, val_list in value.items(): ++ if isinstance(val_list, list) and any(item is None for item in val_list): ++ raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.") ++ ++ return value + + @field_validator("data", "data_args", "rate", mode="wrap") + @classmethod +diff --git a/tests/unit/benchmark/test_per_constraints.py b/tests/unit/benchmark/test_per_constraints.py +new file mode 100644 +index 0000000..2330679 +--- /dev/null ++++ b/tests/unit/benchmark/test_per_constraints.py +@@ -0,0 +1,157 @@ ++import json ++from pathlib import Path ++ ++import pytest ++ ++from guidellm.benchmark.entrypoints import resolve_profile ++from guidellm.benchmark.profiles import SweepProfile, SynchronousProfile ++from guidellm.benchmark.schemas import BenchmarkGenerativeTextArgs ++from guidellm.scheduler import ( ++ AsyncConstantStrategy, ++ ConstraintsInitializerFactory, ++ SynchronousStrategy, ++ ThroughputStrategy, ++) ++ ++@pytest.mark.smoke ++@pytest.mark.asyncio ++async def test_resolve_profile_allows_per_constraints_for_sweep(): ++ profile = await resolve_profile( ++ profile="sweep", ++ rate=[5], ++ random_seed=123, ++ rampup=0.0, ++ constraints={}, ++ max_seconds=None, ++ max_requests=None, ++ max_errors=None, ++ max_error_rate=None, ++ max_global_error_rate=None, ++ console=None, ++ per_constraints={"max_seconds": [1, 2, 3, 4, 5]}, ++ ) ++ ++ assert isinstance(profile, SweepProfile) ++ assert profile.per_constraints == {"max_seconds": [1, 2, 3, 4, 5]} ++ ++@pytest.mark.smoke ++@pytest.mark.asyncio ++async def test_resolve_profile_rejects_per_constraints_for_non_sweep(): ++ with pytest.raises(ValueError, match="Per-strategy constraints are only supported with the 'sweep' profile."): ++ await resolve_profile( ++ profile="synchronous", ++ rate=None, ++ random_seed=123, ++ rampup=0.0, ++ constraints={}, ++ max_seconds=None, ++ max_requests=None, ++ max_errors=None, ++ max_error_rate=None, ++ max_global_error_rate=None, ++ console=None, ++ per_constraints={"max_seconds": [1]}, ++ ) ++ ++@pytest.mark.smoke ++@pytest.mark.asyncio ++async def test_resolve_profile_rejects_per_constraints_for_instances(): ++ synchronous_profile = SynchronousProfile() ++ ++ with pytest.raises( ++ ValueError, match="Per-strategy constraints cannot be applied" ++ ): ++ await resolve_profile( ++ profile=synchronous_profile, ++ rate=None, ++ random_seed=123, ++ rampup=0.0, ++ constraints={}, ++ max_seconds=None, ++ max_requests=None, ++ max_errors=None, ++ max_error_rate=None, ++ max_global_error_rate=None, ++ console=None, ++ per_constraints={"max_seconds": [1]}, ++ ) ++ ++@pytest.mark.smoke ++def test_sweep_profile_applies_per_constraints_sequence(monkeypatch): ++ captured: list[dict[str, int]] = [] ++ ++ def fake_resolve(value): ++ captured.append(value) ++ return value ++ ++ monkeypatch.setattr( ++ ConstraintsInitializerFactory, "resolve", staticmethod(fake_resolve) ++ ) ++ ++ profile = SweepProfile( ++ sweep_size=3, ++ per_constraints={"max_seconds": [5, 10, 15]}, ++ constraints={"max_seconds": 30, "max_requests": 100}, ++ ) ++ ++ sync = SynchronousStrategy() ++ profile.next_strategy_constraints(sync, None, None) ++ assert captured[-1]["max_seconds"] == 5 ++ assert captured[-1]["max_requests"] == 100 ++ ++ profile.completed_strategies.append(sync) ++ throughput = ThroughputStrategy(max_concurrency=1, rampup_duration=0.0) ++ profile.next_strategy_constraints(throughput, sync, None) ++ assert captured[-1]["max_seconds"] == 10 ++ assert captured[-1]["max_requests"] == 100 ++ ++ profile.completed_strategies.append(throughput) ++ async_strategy = AsyncConstantStrategy(rate=1.0, max_concurrency=None) ++ profile.next_strategy_constraints(async_strategy, throughput, None) ++ assert captured[-1]["max_seconds"] == 15 ++ assert captured[-1]["max_requests"] == 100 ++ ++@pytest.mark.smoke ++def test_benchmark_args_accept_per_constraints_from_scenario(tmp_path: Path): ++ scenario_path = tmp_path / "scenario.json" ++ scenario_content = { ++ "target": "http://localhost:9000", ++ "data": ["prompt_tokens=8,output_tokens=8"], ++ "profile": "sweep", ++ "rate": 5, ++ "per_constraints": {"max_seconds": [5, 10, 15, 15, 20], "max_requests": [100, 200, 200, 400, 400]}, ++ } ++ scenario_path.write_text(json.dumps(scenario_content)) ++ ++ args = BenchmarkGenerativeTextArgs.create(scenario=scenario_path) ++ ++ assert args.per_constraints == {"max_seconds": [5, 10, 15, 15, 20], "max_requests": [100, 200, 200, 400, 400]} ++ ++ ++@pytest.mark.smoke ++@pytest.mark.asyncio ++async def test_resolve_profile_rejects_null_per_constraints(): ++ with pytest.raises(ValueError, match="Per-strategy constraints for 'max_seconds' contain null values, which are not allowed."): ++ await resolve_profile( ++ profile="sweep", ++ rate=[5], ++ random_seed=123, ++ rampup=0.0, ++ constraints={}, ++ max_seconds=None, ++ max_requests=None, ++ max_errors=None, ++ max_error_rate=None, ++ max_global_error_rate=None, ++ console=None, ++ per_constraints={"max_seconds": [5, None, 15, 20, 25, 30]}, ++ ) ++ ++ ++@pytest.mark.smoke ++def test_sweep_profile_rejects_null_per_constraints(): ++ with pytest.raises(ValueError, match="Per-strategy constraints for 'max_requests' contain null values, which are not allowed."): ++ SweepProfile( ++ sweep_size=5, ++ per_constraints={"max_requests": [100, None, 200, 300, 400]}, ++ ) +diff --git a/tests/unit/benchmark/test_profiles.py b/tests/unit/benchmark/test_profiles.py +new file mode 100644 +index 0000000..68c0e4f +--- /dev/null ++++ b/tests/unit/benchmark/test_profiles.py +@@ -0,0 +1,128 @@ ++ ++from __future__ import annotations ++ ++from unittest.mock import Mock ++ ++import pytest ++ ++from guidellm.benchmark.profiles import ( ++ AsyncConstantStrategy, ++ Profile, ++ SweepProfile, ++ SynchronousStrategy, ++ ThroughputStrategy, ++) ++from guidellm.scheduler import Constraint, ConstraintsInitializerFactory, SchedulingStrategy, MaxDurationConstraint, MaxNumberConstraint ++ ++ ++def test_sweep_profile_strategies_generator_adaptive_rates(): ++ """ ++ Tests that the SweepProfile strategies_generator yields the correct sequence of ++ strategies with adaptively calculated rates. ++ """ ++ # 1. Initialize SweepProfile ++ profile = SweepProfile(sweep_size=4, strategy_type="constant", max_concurrency=16) ++ generator = profile.strategies_generator() ++ ++ # 2. First step should be SynchronousStrategy ++ strategy, constraints = next(generator) ++ assert isinstance(strategy, SynchronousStrategy) ++ ++ # 3. Send mock benchmark result for the synchronous run ++ mock_sync_benchmark = Mock() ++ mock_sync_benchmark.request_throughput.successful.mean = 50.0 ++ strategy, constraints = generator.send(mock_sync_benchmark) ++ ++ # 4. Second step should be ThroughputStrategy ++ assert isinstance(strategy, ThroughputStrategy) ++ assert strategy.max_concurrency == 16 ++ ++ # 5. Send mock benchmark result for the throughput run ++ mock_throughput_benchmark = Mock() ++ mock_throughput_benchmark.request_throughput.successful.mean = 200.0 ++ strategy, constraints = generator.send(mock_throughput_benchmark) ++ ++ # The profile should now have calculated the rates for the async strategies. ++ # np.linspace(50, 200, 3) -> [50., 125., 200.]. After slicing [1:], it's [125., 200.] ++ assert profile.measured_rates == [125.0, 200.0] ++ ++ # 6. Third step should be AsyncConstantStrategy with the first calculated rate ++ assert isinstance(strategy, AsyncConstantStrategy) ++ assert strategy.rate == 125.0 ++ assert strategy.max_concurrency == 16 ++ ++ # 7. Send a dummy benchmark result ++ mock_async_benchmark_1 = Mock() ++ strategy, constraints = generator.send(mock_async_benchmark_1) ++ ++ # 8. Fourth step should be AsyncConstantStrategy with the second calculated rate ++ assert isinstance(strategy, AsyncConstantStrategy) ++ assert strategy.rate == 200.0 ++ assert strategy.max_concurrency == 16 ++ ++ # 9. Send the final dummy benchmark, expecting the generator to stop ++ mock_async_benchmark_2 = Mock() ++ with pytest.raises(StopIteration): ++ generator.send(mock_async_benchmark_2) ++ ++ ++def test_sweep_profile_strategy_constraints(): ++ """ ++ Tests that the SweepProfile applies both shared and per-strategy constraints ++ correctly at each step of the strategy generation process. ++ """ ++ # 1. Initialize SweepProfile with both shared and per-strategy constraints. ++ # `max_duration` is shared across all steps. ++ # `max_requests` has a specific value for each step. ++ # `max_errors` is specified for some steps and disabled (None) for others. ++ profile = SweepProfile( ++ sweep_size=5, ++ strategy_type="constant", ++ rate=[1.0], # Dummy rate, not directly used by constraints test ++ max_duration=60, ++ per_constraints={ ++ "max_requests": [10, 100, 200, 300, 400], ++ "max_errors": [1, 5, 10, 15, 20], ++ }, ++ ) ++ ++ # 2. Verify that constraints were parsed and separated correctly. ++ assert profile.per_constraints == { ++ "max_requests": [10, 100, 200, 300, 400], ++ "max_errors": [1, 5, 10, 15, 20], ++ } ++ ++ generator = profile.strategies_generator() ++ mock_benchmark = Mock() ++ mock_benchmark.request_throughput.successful.mean = 50.0 ++ ++ # 3. Test Step 1: Synchronous Strategy ++ strategy, constraints_dict = next(generator) ++ assert isinstance(strategy, SynchronousStrategy) ++ assert constraints_dict["max_requests"].max_num == 10 ++ ++ # 4. Test Step 2: Throughput Strategy ++ strategy, constraints_dict = generator.send(mock_benchmark) ++ assert isinstance(strategy, ThroughputStrategy) ++ assert constraints_dict["max_requests"].max_num == 100 ++ assert constraints_dict["max_errors"].max_errors == 5 ++ ++ # 5. Test Step 3, 4, 5: Async Strategies ++ expected_async_max_requests = [200, 300, 400] ++ expected_async_max_errors = [10, 15, 20] ++ for i in range(3): ++ strategy, constraints_dict = generator.send(mock_benchmark) ++ assert isinstance(strategy, AsyncConstantStrategy) ++ assert constraints_dict is not None ++ # Check shared and per-strategy constraints for this async step ++ assert constraints_dict["max_requests"].max_num == expected_async_max_requests[i] ++ if expected_async_max_errors[i] is not None: ++ assert "max_errors" in constraints_dict ++ assert constraints_dict["max_errors"].max_errors == expected_async_max_errors[i] ++ else: ++ assert "max_errors" not in constraints_dict ++ ++ # 6. Expect StopIteration after the last step ++ with pytest.raises(StopIteration): ++ generator.send(mock_benchmark) ++ +diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py +index 134f553..e2852f7 100644 +--- a/tests/unit/test_main.py ++++ b/tests/unit/test_main.py +@@ -1,6 +1,6 @@ + import json + from pathlib import Path +-from unittest.mock import patch ++from unittest.mock import patch, AsyncMock + + import pytest + from click.testing import CliRunner +@@ -83,3 +83,34 @@ def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path): + backend_args = scenario.backend_kwargs + expected_headers = {"Authorization": None, "Custom-Header": "Custom-Value"} + assert backend_args["headers"] == expected_headers ++@patch("guidellm.__main__.benchmark_generative_text", new_callable=AsyncMock) ++def test_cli_passes_per_constraints(mock_benchmark_func): ++ runner = CliRunner() ++ result = runner.invoke( ++ cli, ++ [ ++ "benchmark", ++ "run", ++ "--target", ++ "http://localhost:9", ++ "--data", ++ "prompt_tokens=1,output_tokens=1", ++ "--profile", ++ "sweep", ++ "--rate", ++ "5", ++ "--max-requests", ++ "1", ++ "--per-constraints", ++ '{"max_seconds":[5,10,15,15,20], "max_requests":[100,200,200,400,400]}', ++ ], ++ catch_exceptions=False, ++ ) ++ ++ assert result.exit_code == 0, result.output ++ mock_benchmark_func.assert_called_once() ++ args = mock_benchmark_func.call_args.kwargs["args"] ++ assert args.per_constraints == { ++ "max_seconds": [5,10,15,15,20], ++ "max_requests": [100,200,200,400,400], ++ } +-- +2.34.1 + diff --git a/docs/getting-started/benchmark.md b/docs/getting-started/benchmark.md index 24a32042f..afbccb93b 100644 --- a/docs/getting-started/benchmark.md +++ b/docs/getting-started/benchmark.md @@ -62,6 +62,40 @@ GuideLLM supports several benchmark profiles and strategies: - `poisson`: Sends requests following a Poisson distribution - `sweep`: Automatically determines optimal performance points (default) +### Per-Strategy Constraints in `sweep` + +Sweep benchmarks execute a fixed sequence—synchronous → throughput → async rates—so you can set limits for each stage with `per_constraints`. Two entry points support it: + +#### CLI Example + +```bash +guidellm benchmark run \ + --profile sweep \ + --rate 5 \ + --target "http://localhost:8000" \ + --data "prompt_tokens=256,output_tokens=128" \ + --per-constraints '{"max_seconds":[5,10,15,15,20], "max_requests":[100, 200, 200, 400, 400]}' +``` + +Values are applied in order across the sweep strategies; Providing `--per-constraints` with any non-sweep profile raises a validation error to prevent accidental misuse. + +#### Scenario Example + +```json +{ + "target": "http://localhost:8000", + "data": ["prompt_tokens=256,output_tokens=128"], + "profile": "sweep", + "rate": 5, + "per_constraints": { + "max_seconds": [5,10,15,15,20], + "max_requests": [100, 200, 200, 400, 400] + } +} +``` + +Running `guidellm benchmark run --scenario my_sweep.json` automatically applies these per-strategy settings without additional CLI flags. + ### Data Options For synthetic data, some key options include, among others: diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py index d0fc89a19..253b8d693 100644 --- a/src/guidellm/__main__.py +++ b/src/guidellm/__main__.py @@ -404,6 +404,13 @@ def benchmark(): flag_value='{"enabled": true}', help="Enable over-saturation detection with default settings.", ) +@click.option( + "--per-constraints", + callback=cli_tools.parse_json, + default=None, + help="Per-strategy constraints for sweep mode. Format: {'constraint_name': [value1, value2, ...]}", +) + def run(**kwargs): # noqa: C901 # Only set CLI args that differ from click defaults kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs) diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py index 75c8c787b..a96a82c46 100644 --- a/src/guidellm/benchmark/entrypoints.py +++ b/src/guidellm/benchmark/entrypoints.py @@ -325,6 +325,8 @@ async def resolve_profile( max_global_error_rate: float | None, over_saturation: dict[str, Any] | None = None, console: Console | None = None, + per_constraints: dict[str, Any] | None = None, + **kwargs: Any, ) -> Profile: """ Resolve and configure a benchmark profile with rate and constraint settings. @@ -346,6 +348,7 @@ async def resolve_profile( :param max_global_error_rate: Maximum global error rate threshold before stopping :param over_saturation: Over-saturation detection configuration (dict) :param console: Console instance for progress reporting, or None + :param per_constraints: Per-strategy constraints (sweep profile only) :return: Configured Profile instance ready for benchmarking :raises ValueError: If constraints are provided with a pre-configured Profile """ @@ -367,18 +370,37 @@ async def resolve_profile( constraints[key] = val if not isinstance(profile, Profile): + supported_per_constraints = {"sweep"} + profile_kwargs = dict(kwargs) + if per_constraints: + profile_type = profile if isinstance(profile, str) else str(profile) + if profile_type not in supported_per_constraints: + raise ValueError( + "Per-strategy constraints are only supported with the 'sweep' profile." + ) + # Validate no nulls in per_constraints lists + for key, val_list in per_constraints.items(): + if isinstance(val_list, list) and any(item is None for item in val_list): + raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.") + profile_kwargs["per_constraints"] = per_constraints + profile = Profile.create( rate_type=profile, rate=rate, random_seed=random_seed, rampup_duration=rampup, constraints={**constraints}, + **profile_kwargs, ) elif constraints: raise ValueError( "Constraints must be empty when providing a Profile instance. " f"Provided constraints: {constraints} ; provided profile: {profile}" ) + elif per_constraints: + raise ValueError( + "Per-strategy constraints cannot be applied when providing a Profile instance." + ) elif rampup > 0.0: raise ValueError( "Ramp-up duration must not be set when providing a Profile instance. " @@ -505,6 +527,7 @@ async def benchmark_generative_text( max_global_error_rate=args.max_global_error_rate, over_saturation=args.over_saturation, console=console, + per_constraints=args.per_constraints, ) output_formats = await resolve_output_formats( outputs=args.outputs, output_dir=args.output_dir, console=console diff --git a/src/guidellm/benchmark/profiles.py b/src/guidellm/benchmark/profiles.py index 848722809..e3695cb46 100644 --- a/src/guidellm/benchmark/profiles.py +++ b/src/guidellm/benchmark/profiles.py @@ -609,6 +609,34 @@ class SweepProfile(Profile): default_factory=list, description="Interpolated rates between synchronous and throughput", ) + per_constraints: dict[str, list[Any]] | None = Field( + default=None, + description="Per-strategy constraints only valid in sweep mode", + ) + + @field_validator("per_constraints", mode="before") + @classmethod + def validate_per_constraints(cls, value: Any) -> dict[str, list[Any]] | None: + """ + Validate that per_constraints doesn't contain null values in the lists. + + :param value: Input value for per_constraints field + :return: Validated per_constraints dictionary + """ + if value is None: + return None + + if not isinstance(value, dict): + return value + + for key, val_list in value.items(): + if not isinstance(val_list, list): + continue + + if any(item is None for item in val_list): + raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.") + + return value @classmethod def resolve_args( @@ -632,7 +660,48 @@ def resolve_args( kwargs["random_seed"] = random_seed if rate_type in ["constant", "poisson"]: kwargs["strategy_type"] = rate_type + if "per_constraints" in kwargs: + # Already in the correct format, keep it + pass + elif "constraints" in kwargs: + # Backward compatibility: split into per-strategy and shared constraints + constraints = kwargs["constraints"] + if isinstance(constraints, dict): + shared_constraints = {} + per_constraints = {} + for key, val in constraints.items(): + if isinstance(val, list): + per_constraints[key] = val + else: + shared_constraints[key] = val + kwargs["constraints"] = shared_constraints or None + kwargs["per_constraints"] = per_constraints or None return kwargs + def next_strategy_constraints( + self, + next_strategy: SchedulingStrategy | None, + prev_strategy: SchedulingStrategy | None, + prev_benchmark: Benchmark | None, + ) -> dict[str, Constraint] | None: + if not next_strategy: + return None + + current_index = len(self.completed_strategies) + final_constraints: dict[str, Any] = dict(self.constraints or {}) + + if self.per_constraints: + for key, val in self.per_constraints.items(): + if 0 <= current_index < self.sweep_size: + constraint_val = val[current_index] + if constraint_val is None: + final_constraints.pop(key, None) + else: + final_constraints[key] = constraint_val + return ( + ConstraintsInitializerFactory.resolve(final_constraints) + if final_constraints + else None + ) @property def strategy_types(self) -> list[str]: diff --git a/src/guidellm/benchmark/schemas/generative/entrypoints.py b/src/guidellm/benchmark/schemas/generative/entrypoints.py index fff2bec37..e7e674eb2 100644 --- a/src/guidellm/benchmark/schemas/generative/entrypoints.py +++ b/src/guidellm/benchmark/schemas/generative/entrypoints.py @@ -291,6 +291,31 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any: "moe_threshold, etc.)." ), ) + per_constraints: dict[str, Any] | None = Field( + default=None, + description="Specified constraints to apply to the sweep profile", + ) + + @field_validator("per_constraints", mode="before") + @classmethod + def validate_per_constraints(cls, value: Any) -> dict[str, Any] | None: + """ + Validate that per_constraints doesn't contain null values in the lists. + + :param value: Input value for per_constraints field + :return: Validated per_constraints dictionary + """ + if value is None: + return None + + if not isinstance(value, dict): + return value + + for key, val_list in value.items(): + if isinstance(val_list, list) and any(item is None for item in val_list): + raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.") + + return value @field_validator("data", "data_args", "rate", mode="wrap") @classmethod diff --git a/tests/unit/benchmark/test_per_constraints.py b/tests/unit/benchmark/test_per_constraints.py new file mode 100644 index 000000000..2330679a1 --- /dev/null +++ b/tests/unit/benchmark/test_per_constraints.py @@ -0,0 +1,157 @@ +import json +from pathlib import Path + +import pytest + +from guidellm.benchmark.entrypoints import resolve_profile +from guidellm.benchmark.profiles import SweepProfile, SynchronousProfile +from guidellm.benchmark.schemas import BenchmarkGenerativeTextArgs +from guidellm.scheduler import ( + AsyncConstantStrategy, + ConstraintsInitializerFactory, + SynchronousStrategy, + ThroughputStrategy, +) + +@pytest.mark.smoke +@pytest.mark.asyncio +async def test_resolve_profile_allows_per_constraints_for_sweep(): + profile = await resolve_profile( + profile="sweep", + rate=[5], + random_seed=123, + rampup=0.0, + constraints={}, + max_seconds=None, + max_requests=None, + max_errors=None, + max_error_rate=None, + max_global_error_rate=None, + console=None, + per_constraints={"max_seconds": [1, 2, 3, 4, 5]}, + ) + + assert isinstance(profile, SweepProfile) + assert profile.per_constraints == {"max_seconds": [1, 2, 3, 4, 5]} + +@pytest.mark.smoke +@pytest.mark.asyncio +async def test_resolve_profile_rejects_per_constraints_for_non_sweep(): + with pytest.raises(ValueError, match="Per-strategy constraints are only supported with the 'sweep' profile."): + await resolve_profile( + profile="synchronous", + rate=None, + random_seed=123, + rampup=0.0, + constraints={}, + max_seconds=None, + max_requests=None, + max_errors=None, + max_error_rate=None, + max_global_error_rate=None, + console=None, + per_constraints={"max_seconds": [1]}, + ) + +@pytest.mark.smoke +@pytest.mark.asyncio +async def test_resolve_profile_rejects_per_constraints_for_instances(): + synchronous_profile = SynchronousProfile() + + with pytest.raises( + ValueError, match="Per-strategy constraints cannot be applied" + ): + await resolve_profile( + profile=synchronous_profile, + rate=None, + random_seed=123, + rampup=0.0, + constraints={}, + max_seconds=None, + max_requests=None, + max_errors=None, + max_error_rate=None, + max_global_error_rate=None, + console=None, + per_constraints={"max_seconds": [1]}, + ) + +@pytest.mark.smoke +def test_sweep_profile_applies_per_constraints_sequence(monkeypatch): + captured: list[dict[str, int]] = [] + + def fake_resolve(value): + captured.append(value) + return value + + monkeypatch.setattr( + ConstraintsInitializerFactory, "resolve", staticmethod(fake_resolve) + ) + + profile = SweepProfile( + sweep_size=3, + per_constraints={"max_seconds": [5, 10, 15]}, + constraints={"max_seconds": 30, "max_requests": 100}, + ) + + sync = SynchronousStrategy() + profile.next_strategy_constraints(sync, None, None) + assert captured[-1]["max_seconds"] == 5 + assert captured[-1]["max_requests"] == 100 + + profile.completed_strategies.append(sync) + throughput = ThroughputStrategy(max_concurrency=1, rampup_duration=0.0) + profile.next_strategy_constraints(throughput, sync, None) + assert captured[-1]["max_seconds"] == 10 + assert captured[-1]["max_requests"] == 100 + + profile.completed_strategies.append(throughput) + async_strategy = AsyncConstantStrategy(rate=1.0, max_concurrency=None) + profile.next_strategy_constraints(async_strategy, throughput, None) + assert captured[-1]["max_seconds"] == 15 + assert captured[-1]["max_requests"] == 100 + +@pytest.mark.smoke +def test_benchmark_args_accept_per_constraints_from_scenario(tmp_path: Path): + scenario_path = tmp_path / "scenario.json" + scenario_content = { + "target": "http://localhost:9000", + "data": ["prompt_tokens=8,output_tokens=8"], + "profile": "sweep", + "rate": 5, + "per_constraints": {"max_seconds": [5, 10, 15, 15, 20], "max_requests": [100, 200, 200, 400, 400]}, + } + scenario_path.write_text(json.dumps(scenario_content)) + + args = BenchmarkGenerativeTextArgs.create(scenario=scenario_path) + + assert args.per_constraints == {"max_seconds": [5, 10, 15, 15, 20], "max_requests": [100, 200, 200, 400, 400]} + + +@pytest.mark.smoke +@pytest.mark.asyncio +async def test_resolve_profile_rejects_null_per_constraints(): + with pytest.raises(ValueError, match="Per-strategy constraints for 'max_seconds' contain null values, which are not allowed."): + await resolve_profile( + profile="sweep", + rate=[5], + random_seed=123, + rampup=0.0, + constraints={}, + max_seconds=None, + max_requests=None, + max_errors=None, + max_error_rate=None, + max_global_error_rate=None, + console=None, + per_constraints={"max_seconds": [5, None, 15, 20, 25, 30]}, + ) + + +@pytest.mark.smoke +def test_sweep_profile_rejects_null_per_constraints(): + with pytest.raises(ValueError, match="Per-strategy constraints for 'max_requests' contain null values, which are not allowed."): + SweepProfile( + sweep_size=5, + per_constraints={"max_requests": [100, None, 200, 300, 400]}, + ) diff --git a/tests/unit/benchmark/test_profiles.py b/tests/unit/benchmark/test_profiles.py new file mode 100644 index 000000000..b59394179 --- /dev/null +++ b/tests/unit/benchmark/test_profiles.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +from unittest.mock import Mock + +import pytest + +from guidellm.benchmark.profiles import ( + AsyncConstantStrategy, + Profile, + SweepProfile, + SynchronousStrategy, + ThroughputStrategy, +) +from guidellm.scheduler import Constraint, ConstraintsInitializerFactory, SchedulingStrategy, MaxDurationConstraint, MaxNumberConstraint + + +def test_sweep_profile_strategies_generator_adaptive_rates(): + """ + Tests that the SweepProfile strategies_generator yields the correct sequence of + strategies with adaptively calculated rates. + """ + # 1. Initialize SweepProfile + profile = SweepProfile(sweep_size=4, strategy_type="constant", max_concurrency=16) + generator = profile.strategies_generator() + + # 2. First step should be SynchronousStrategy + strategy, constraints = next(generator) + assert isinstance(strategy, SynchronousStrategy) + + # 3. Send mock benchmark result for the synchronous run + mock_sync_benchmark = Mock() + mock_sync_benchmark.request_throughput.successful.mean = 50.0 + strategy, constraints = generator.send(mock_sync_benchmark) + + # 4. Second step should be ThroughputStrategy + assert isinstance(strategy, ThroughputStrategy) + assert strategy.max_concurrency == 16 + + # 5. Send mock benchmark result for the throughput run + mock_throughput_benchmark = Mock() + mock_throughput_benchmark.request_throughput.successful.mean = 200.0 + strategy, constraints = generator.send(mock_throughput_benchmark) + + # The profile should now have calculated the rates for the async strategies. + # np.linspace(50, 200, 3) -> [50., 125., 200.]. After slicing [1:], it's [125., 200.] + assert profile.measured_rates == [125.0, 200.0] + + # 6. Third step should be AsyncConstantStrategy with the first calculated rate + assert isinstance(strategy, AsyncConstantStrategy) + assert strategy.rate == 125.0 + assert strategy.max_concurrency == 16 + + # 7. Send a dummy benchmark result + mock_async_benchmark_1 = Mock() + strategy, constraints = generator.send(mock_async_benchmark_1) + + # 8. Fourth step should be AsyncConstantStrategy with the second calculated rate + assert isinstance(strategy, AsyncConstantStrategy) + assert strategy.rate == 200.0 + assert strategy.max_concurrency == 16 + + # 9. Send the final dummy benchmark, expecting the generator to stop + mock_async_benchmark_2 = Mock() + with pytest.raises(StopIteration): + generator.send(mock_async_benchmark_2) + + +def test_sweep_profile_strategy_constraints(): + """ + Tests that the SweepProfile applies both shared and per-strategy constraints + correctly at each step of the strategy generation process. + """ + # 1. Initialize SweepProfile with both shared and per-strategy constraints. + # `max_duration` is shared across all steps. + # `max_requests` has a specific value for each step. + # `max_errors` is specified for some steps and disabled (None) for others. + profile = SweepProfile( + sweep_size=5, + strategy_type="constant", + rate=[1.0], # Dummy rate, not directly used by constraints test + max_duration=60, + per_constraints={ + "max_requests": [10, 100, 200, 300, 400], + "max_errors": [1, 5, 10, 15, 20], + }, + ) + + # 2. Verify that constraints were parsed and separated correctly. + assert profile.per_constraints == { + "max_requests": [10, 100, 200, 300, 400], + "max_errors": [1, 5, 10, 15, 20], + } + + generator = profile.strategies_generator() + mock_benchmark = Mock() + mock_benchmark.request_throughput.successful.mean = 50.0 + + # 3. Test Step 1: Synchronous Strategy + strategy, constraints_dict = next(generator) + assert isinstance(strategy, SynchronousStrategy) + assert constraints_dict["max_requests"].max_num == 10 + + # 4. Test Step 2: Throughput Strategy + strategy, constraints_dict = generator.send(mock_benchmark) + assert isinstance(strategy, ThroughputStrategy) + assert constraints_dict["max_requests"].max_num == 100 + assert constraints_dict["max_errors"].max_errors == 5 + + # 5. Test Step 3, 4, 5: Async Strategies + expected_async_max_requests = [200, 300, 400] + expected_async_max_errors = [10, 15, 20] + for i in range(3): + strategy, constraints_dict = generator.send(mock_benchmark) + assert isinstance(strategy, AsyncConstantStrategy) + assert constraints_dict is not None + # Check shared and per-strategy constraints for this async step + assert constraints_dict["max_requests"].max_num == expected_async_max_requests[i] + if expected_async_max_errors[i] is not None: + assert "max_errors" in constraints_dict + assert constraints_dict["max_errors"].max_errors == expected_async_max_errors[i] + else: + assert "max_errors" not in constraints_dict + + # 6. Expect StopIteration after the last step + with pytest.raises(StopIteration): + generator.send(mock_benchmark) diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index 134f5531b..8cf2db4c3 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -1,6 +1,6 @@ import json from pathlib import Path -from unittest.mock import patch +from unittest.mock import patch, AsyncMock import pytest from click.testing import CliRunner @@ -83,3 +83,35 @@ def test_cli_backend_args_header_removal(mock_benchmark_func, tmp_path: Path): backend_args = scenario.backend_kwargs expected_headers = {"Authorization": None, "Custom-Header": "Custom-Value"} assert backend_args["headers"] == expected_headers + +@patch("guidellm.__main__.benchmark_generative_text", new_callable=AsyncMock) +def test_cli_passes_per_constraints(mock_benchmark_func): + runner = CliRunner() + result = runner.invoke( + cli, + [ + "benchmark", + "run", + "--target", + "http://localhost:9", + "--data", + "prompt_tokens=1,output_tokens=1", + "--profile", + "sweep", + "--rate", + "5", + "--max-requests", + "1", + "--per-constraints", + '{"max_seconds":[5,10,15,15,20], "max_requests":[100,200,200,400,400]}', + ], + catch_exceptions=False, + ) + + assert result.exit_code == 0, result.output + mock_benchmark_func.assert_called_once() + args = mock_benchmark_func.call_args.kwargs["args"] + assert args.per_constraints == { + "max_seconds": [5,10,15,15,20], + "max_requests": [100,200,200,400,400], + }