vllm-project · AiKiAi-stack · Dec 8, 2025
diff --git a/0001-feature-add-per_constraints-unit_test-and-docs.patch b/0001-feature-add-per_constraints-unit_test-and-docs.patch
diff --git a/docs/getting-started/benchmark.md b/docs/getting-started/benchmark.md
@@ -62,6 +62,40 @@ GuideLLM supports several benchmark profiles and strategies:
 - `poisson`: Sends requests following a Poisson distribution
 - `sweep`: Automatically determines optimal performance points (default)
 
+### Per-Strategy Constraints in `sweep`
+
+Sweep benchmarks execute a fixed sequence—synchronous → throughput → async rates—so you can set limits for each stage with `per_constraints`. Two entry points support it:
+
+#### CLI Example
+
+```bash
+guidellm benchmark run \
+  --profile sweep \
+  --rate 5 \
+  --target "http://localhost:8000" \
+  --data "prompt_tokens=256,output_tokens=128" \
+  --per-constraints '{"max_seconds":[5,10,15,15,20], "max_requests":[100, 200, 200, 400, 400]}'
+```
+
+Values are applied in order across the sweep strategies; Providing `--per-constraints` with any non-sweep profile raises a validation error to prevent accidental misuse.
+
+#### Scenario Example
+
+```json
+{
+  "target": "http://localhost:8000",
+  "data": ["prompt_tokens=256,output_tokens=128"],
+  "profile": "sweep",
+  "rate": 5,
+  "per_constraints": {
+    "max_seconds": [5,10,15,15,20],
+    "max_requests": [100, 200, 200, 400, 400]
+  }
+}
+```
+
+Running `guidellm benchmark run --scenario my_sweep.json` automatically applies these per-strategy settings without additional CLI flags.
+
 ### Data Options
 
 For synthetic data, some key options include, among others:

diff --git a/src/guidellm/__main__.py b/src/guidellm/__main__.py
@@ -404,6 +404,13 @@ def benchmark():
     flag_value='{"enabled": true}',
     help="Enable over-saturation detection with default settings.",
 )
+@click.option(
+    "--per-constraints",
+    callback=cli_tools.parse_json,
+    default=None,
+    help="Per-strategy constraints for sweep mode. Format: {'constraint_name': [value1, value2, ...]}",
+)
+
 def run(**kwargs):  # noqa: C901
     # Only set CLI args that differ from click defaults
     kwargs = cli_tools.set_if_not_default(click.get_current_context(), **kwargs)

diff --git a/src/guidellm/benchmark/entrypoints.py b/src/guidellm/benchmark/entrypoints.py
@@ -325,6 +325,8 @@ async def resolve_profile(
     max_global_error_rate: float | None,
     over_saturation: dict[str, Any] | None = None,
     console: Console | None = None,
+    per_constraints: dict[str, Any] | None = None,
+    **kwargs: Any,
 ) -> Profile:
     """
     Resolve and configure a benchmark profile with rate and constraint settings.
@@ -346,6 +348,7 @@ async def resolve_profile(
     :param max_global_error_rate: Maximum global error rate threshold before stopping
     :param over_saturation: Over-saturation detection configuration (dict)
     :param console: Console instance for progress reporting, or None
+    :param per_constraints: Per-strategy constraints (sweep profile only)
     :return: Configured Profile instance ready for benchmarking
     :raises ValueError: If constraints are provided with a pre-configured Profile
     """
@@ -367,18 +370,37 @@ async def resolve_profile(
             constraints[key] = val
 
     if not isinstance(profile, Profile):
+        supported_per_constraints = {"sweep"}
+        profile_kwargs = dict(kwargs)
+        if per_constraints:
+            profile_type = profile if isinstance(profile, str) else str(profile)
+            if profile_type not in supported_per_constraints:
+                raise ValueError(
+                    "Per-strategy constraints are only supported with the 'sweep' profile."
+                )
+            # Validate no nulls in per_constraints lists
+            for key, val_list in per_constraints.items():
+                if isinstance(val_list, list) and any(item is None for item in val_list):
+                    raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.")
+            profile_kwargs["per_constraints"] = per_constraints
+
         profile = Profile.create(
             rate_type=profile,
             rate=rate,
             random_seed=random_seed,
             rampup_duration=rampup,
             constraints={**constraints},
+			**profile_kwargs,
         )
     elif constraints:
         raise ValueError(
             "Constraints must be empty when providing a Profile instance. "
             f"Provided constraints: {constraints} ; provided profile: {profile}"
         )
+    elif per_constraints:
+        raise ValueError(
+			"Per-strategy constraints cannot be applied when providing a Profile instance."
+        )
     elif rampup > 0.0:
         raise ValueError(
             "Ramp-up duration must not be set when providing a Profile instance. "
@@ -505,6 +527,7 @@ async def benchmark_generative_text(
         max_global_error_rate=args.max_global_error_rate,
         over_saturation=args.over_saturation,
         console=console,
+		per_constraints=args.per_constraints,
     )
     output_formats = await resolve_output_formats(
         outputs=args.outputs, output_dir=args.output_dir, console=console

diff --git a/src/guidellm/benchmark/profiles.py b/src/guidellm/benchmark/profiles.py
@@ -609,6 +609,34 @@ class SweepProfile(Profile):
         default_factory=list,
         description="Interpolated rates between synchronous and throughput",
     )
+    per_constraints: dict[str, list[Any]] | None = Field(
+        default=None,
+        description="Per-strategy constraints only valid in sweep mode",
+    )
+
+    @field_validator("per_constraints", mode="before")
+    @classmethod
+    def validate_per_constraints(cls, value: Any) -> dict[str, list[Any]] | None:
+        """
+        Validate that per_constraints doesn't contain null values in the lists.
+
+        :param value: Input value for per_constraints field
+        :return: Validated per_constraints dictionary
+        """
+        if value is None:
+            return None
+
+        if not isinstance(value, dict):
+            return value
+
+        for key, val_list in value.items():
+            if not isinstance(val_list, list):
+                continue
+
+            if any(item is None for item in val_list):
+                raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.")
+
+        return value
 
     @classmethod
     def resolve_args(
@@ -632,7 +660,48 @@ def resolve_args(
         kwargs["random_seed"] = random_seed
         if rate_type in ["constant", "poisson"]:
             kwargs["strategy_type"] = rate_type
+        if "per_constraints" in kwargs:
+            # Already in the correct format, keep it
+            pass
+        elif "constraints" in kwargs:
+            # Backward compatibility: split into per-strategy and shared constraints
+            constraints = kwargs["constraints"]
+            if isinstance(constraints, dict):
+                shared_constraints = {}
+                per_constraints = {}
+                for key, val in constraints.items():
+                    if isinstance(val, list):
+                        per_constraints[key] = val
+                    else:
+                        shared_constraints[key] = val
+                kwargs["constraints"] = shared_constraints or None
+                kwargs["per_constraints"] = per_constraints or None
         return kwargs
+    def next_strategy_constraints(
+        self,
+        next_strategy: SchedulingStrategy | None,
+        prev_strategy: SchedulingStrategy | None,
+        prev_benchmark: Benchmark | None,
+    ) -> dict[str, Constraint] | None:
+        if not next_strategy:
+            return None
+
+        current_index = len(self.completed_strategies)
+        final_constraints: dict[str, Any] = dict(self.constraints or {})
+
+        if self.per_constraints:
+            for key, val in self.per_constraints.items():
+                if 0 <= current_index < self.sweep_size:
+                    constraint_val = val[current_index]
+                    if constraint_val is None:
+                        final_constraints.pop(key, None)
+                    else:
+                        final_constraints[key] = constraint_val
+        return (
+            ConstraintsInitializerFactory.resolve(final_constraints)
+            if final_constraints
+            else None
+        )
 
     @property
     def strategy_types(self) -> list[str]:

diff --git a/src/guidellm/benchmark/schemas/generative/entrypoints.py b/src/guidellm/benchmark/schemas/generative/entrypoints.py
@@ -291,6 +291,31 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
             "moe_threshold, etc.)."
         ),
     )
+    per_constraints: dict[str, Any] | None = Field(
+        default=None,
+        description="Specified constraints to apply to the sweep profile",
+    )
+
+    @field_validator("per_constraints", mode="before")
+    @classmethod
+    def validate_per_constraints(cls, value: Any) -> dict[str, Any] | None:
+        """
+        Validate that per_constraints doesn't contain null values in the lists.
+
+        :param value: Input value for per_constraints field
+        :return: Validated per_constraints dictionary
+        """
+        if value is None:
+            return None
+
+        if not isinstance(value, dict):
+            return value
+
+        for key, val_list in value.items():
+            if isinstance(val_list, list) and any(item is None for item in val_list):
+                raise ValueError(f"Per-strategy constraints for '{key}' contain null values, which are not allowed.")
+
+        return value
 
     @field_validator("data", "data_args", "rate", mode="wrap")
     @classmethod

diff --git a/tests/unit/benchmark/test_per_constraints.py b/tests/unit/benchmark/test_per_constraints.py
@@ -0,0 +1,157 @@
+import json
+from pathlib import Path
+
+import pytest
+
+from guidellm.benchmark.entrypoints import resolve_profile
+from guidellm.benchmark.profiles import SweepProfile, SynchronousProfile
+from guidellm.benchmark.schemas import BenchmarkGenerativeTextArgs
+from guidellm.scheduler import (
+    AsyncConstantStrategy,
+    ConstraintsInitializerFactory,
+    SynchronousStrategy,
+    ThroughputStrategy,
+)
+
+@pytest.mark.smoke
+@pytest.mark.asyncio
+async def test_resolve_profile_allows_per_constraints_for_sweep():
+    profile = await resolve_profile(
+        profile="sweep",
+        rate=[5],
+        random_seed=123,
+        rampup=0.0,
+        constraints={},
+        max_seconds=None,
+        max_requests=None,
+        max_errors=None,
+        max_error_rate=None,
+        max_global_error_rate=None,
+        console=None,
+        per_constraints={"max_seconds": [1, 2, 3, 4, 5]},
+    )
+
+    assert isinstance(profile, SweepProfile)
+    assert profile.per_constraints == {"max_seconds": [1, 2, 3, 4, 5]}
+
+@pytest.mark.smoke
+@pytest.mark.asyncio
+async def test_resolve_profile_rejects_per_constraints_for_non_sweep():
+    with pytest.raises(ValueError, match="Per-strategy constraints are only supported with the 'sweep' profile."):
+        await resolve_profile(
+            profile="synchronous",
+            rate=None,
+            random_seed=123,
+            rampup=0.0,
+            constraints={},
+            max_seconds=None,
+            max_requests=None,
+            max_errors=None,
+            max_error_rate=None,
+            max_global_error_rate=None,
+            console=None,
+            per_constraints={"max_seconds": [1]},
+        )
+
+@pytest.mark.smoke
+@pytest.mark.asyncio
+async def test_resolve_profile_rejects_per_constraints_for_instances():
+    synchronous_profile = SynchronousProfile()
+
+    with pytest.raises(
+        ValueError, match="Per-strategy constraints cannot be applied"
+    ):
+        await resolve_profile(
+            profile=synchronous_profile,
+            rate=None,
+            random_seed=123,
+            rampup=0.0,
+            constraints={},
+            max_seconds=None,
+            max_requests=None,
+            max_errors=None,
+            max_error_rate=None,
+            max_global_error_rate=None,
+            console=None,
+            per_constraints={"max_seconds": [1]},
+        )
+
+@pytest.mark.smoke
+def test_sweep_profile_applies_per_constraints_sequence(monkeypatch):
+    captured: list[dict[str, int]] = []
+
+    def fake_resolve(value):
+        captured.append(value)
+        return value
+
+    monkeypatch.setattr(
+        ConstraintsInitializerFactory, "resolve", staticmethod(fake_resolve)
+    )
+
+    profile = SweepProfile(
+        sweep_size=3,
+        per_constraints={"max_seconds": [5, 10, 15]},
+        constraints={"max_seconds": 30, "max_requests": 100},
+    )
+
+    sync = SynchronousStrategy()
+    profile.next_strategy_constraints(sync, None, None)
+    assert captured[-1]["max_seconds"] == 5
+    assert captured[-1]["max_requests"] == 100
+
+    profile.completed_strategies.append(sync)
+    throughput = ThroughputStrategy(max_concurrency=1, rampup_duration=0.0)
+    profile.next_strategy_constraints(throughput, sync, None)
+    assert captured[-1]["max_seconds"] == 10
+    assert captured[-1]["max_requests"] == 100
+
+    profile.completed_strategies.append(throughput)
+    async_strategy = AsyncConstantStrategy(rate=1.0, max_concurrency=None)
+    profile.next_strategy_constraints(async_strategy, throughput, None)
+    assert captured[-1]["max_seconds"] == 15
+    assert captured[-1]["max_requests"] == 100
+
+@pytest.mark.smoke
+def test_benchmark_args_accept_per_constraints_from_scenario(tmp_path: Path):
+    scenario_path = tmp_path / "scenario.json"
+    scenario_content = {
+        "target": "http://localhost:9000",
+        "data": ["prompt_tokens=8,output_tokens=8"],
+        "profile": "sweep",
+        "rate": 5,
+        "per_constraints": {"max_seconds": [5, 10, 15, 15, 20], "max_requests": [100, 200, 200, 400, 400]},
+    }
+    scenario_path.write_text(json.dumps(scenario_content))
+
+    args = BenchmarkGenerativeTextArgs.create(scenario=scenario_path)
+
+    assert args.per_constraints == {"max_seconds": [5, 10, 15, 15, 20], "max_requests": [100, 200, 200, 400, 400]}
+
+
+@pytest.mark.smoke
+@pytest.mark.asyncio
+async def test_resolve_profile_rejects_null_per_constraints():
+    with pytest.raises(ValueError, match="Per-strategy constraints for 'max_seconds' contain null values, which are not allowed."):
+        await resolve_profile(
+            profile="sweep",
+            rate=[5],
+            random_seed=123,
+            rampup=0.0,
+            constraints={},
+            max_seconds=None,
+            max_requests=None,
+            max_errors=None,
+            max_error_rate=None,
+            max_global_error_rate=None,
+            console=None,
+            per_constraints={"max_seconds": [5, None, 15, 20, 25, 30]},
+        )
+
+
+@pytest.mark.smoke
+def test_sweep_profile_rejects_null_per_constraints():
+    with pytest.raises(ValueError, match="Per-strategy constraints for 'max_requests' contain null values, which are not allowed."):
+        SweepProfile(
+            sweep_size=5,
+            per_constraints={"max_requests": [100, None, 200, 300, 400]},
+        )