From 009249468c87ef064bda3a8592a5ff23c76bb454 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Mon, 17 Nov 2025 20:58:28 +0000 Subject: [PATCH 01/10] openenvrolloutprocessor --- tests/pytest/test_openenv_browsergym_basic.py | 83 +++++ tests/pytest/test_openenv_browsergym_eval.py | 288 ++++++++++++++++++ tests/pytest/test_openenv_echo_hub.py | 109 +++++++ 3 files changed, 480 insertions(+) create mode 100644 tests/pytest/test_openenv_browsergym_basic.py create mode 100644 tests/pytest/test_openenv_browsergym_eval.py create mode 100644 tests/pytest/test_openenv_echo_hub.py diff --git a/tests/pytest/test_openenv_browsergym_basic.py b/tests/pytest/test_openenv_browsergym_basic.py new file mode 100644 index 00000000..75de7643 --- /dev/null +++ b/tests/pytest/test_openenv_browsergym_basic.py @@ -0,0 +1,83 @@ +import asyncio +import os +import shutil +from typing import Any, Dict, List + +import pytest + +from eval_protocol.models import EvaluationRow, Message +from eval_protocol.pytest.types import RolloutProcessorConfig +from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor + +# Skip these integration-heavy tests on CI runners by default +pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI") + + +@pytest.mark.integration +def test_openenv_browsergym_basic(): + """ + Very basic integration test to ensure OpenEnv + BrowserGym can run a single-step rollout. + Skips automatically if Docker is not available. + """ + if shutil.which("docker") is None: + pytest.skip("Docker not available on PATH; skipping OpenEnv BrowserGym basic test.") + + # Build a minimal EvaluationRow (messages can be empty; processor will add user prompts) + rows: List[EvaluationRow] = [EvaluationRow(messages=[Message(role="user", content="start")])] + + # Use tasks that are known to exist; requires MiniWoB server reachable from containers. + tasks = ["click-test"] + miniwob_url = os.getenv("MINIWOB_URL", "http://172.17.0.1:8888/miniwob/") + + # Construct the processor with a trivial action_parser; the model output will still be generated + # but we parse to a safe noop action to minimize flakiness for the environment step. + from envs.browsergym_env import BrowserGymAction # type: ignore + + processor = OpenEnvRolloutProcessor( + env_factory=None, + prompt_builder=lambda obs, step, history: "Do nothing", + action_parser=lambda text: BrowserGymAction(action_str="noop()"), + tasks=tasks, + miniwob_url=miniwob_url, + docker_image="browsergym-env:latest", + benchmark="miniwob", + timeout_ms=10000, + num_generations=1, + ) + + # Completion params: rely on an available provider/model in the environment + completion_params: Dict[str, Any] = { + "model": os.getenv( + "OPENENV_TEST_MODEL", + # Default to a Fireworks public model id used elsewhere in tests; requires FIREWORKS_API_KEY + "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + ), + "temperature": 0.0, + "max_tokens": 16, + } + + # Limit to a single step to keep the test fast and robust + config = RolloutProcessorConfig( + completion_params=completion_params, + semaphore=asyncio.Semaphore(1), + steps=1, + ) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + async def _run_all(): + tasks_ = processor(rows, config) + return await asyncio.gather(*tasks_) + + completed_rows = loop.run_until_complete(_run_all()) + finally: + loop.close() + + assert len(completed_rows) == 1 + # Basic sanity checks that a rollout happened and usage is populated + row = completed_rows[0] + assert row is not None + assert row.execution_metadata is not None + assert getattr(row.execution_metadata, "duration_seconds", 0.0) >= 0.0 + diff --git a/tests/pytest/test_openenv_browsergym_eval.py b/tests/pytest/test_openenv_browsergym_eval.py new file mode 100644 index 00000000..0de81e20 --- /dev/null +++ b/tests/pytest/test_openenv_browsergym_eval.py @@ -0,0 +1,288 @@ +from typing import Any, Dict, List +import os +import re + +import pytest +from eval_protocol.models import EvaluationRow, Message, EvaluateResult +from eval_protocol.pytest import evaluation_test +from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor +import pytest + +# Skip these integration-heavy tests on CI runners by default +pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI") + + +def openenv_dataset_to_rows(data: List[Dict[str, Any]]) -> List[EvaluationRow]: + """ + Adapter: convert simple {"id": "...", "prompt": "..."} rows into EvaluationRows. + """ + rows: List[EvaluationRow] = [] + for row in data: + prompt = str(row.get("prompt", "start")) + rows.append(EvaluationRow(messages=[Message(role="user", content=prompt)])) + return rows + + +# ---- prompt_builder and action_parser modeled after browsergym_grpo_evalp.py ---- + +ACTION_PATTERN = re.compile(r"[A-Za-z_]+\s*\(.*\)", re.DOTALL) + + +def _as_scalar(x: Any) -> Any: + try: + return x.item() + except Exception: + return x + + +def _extract_goal_url_title(observation: Any) -> tuple[str, str, str]: + goal = getattr(observation, "goal", "") or "" + url = getattr(observation, "url", "") or "" + title = "" + metadata = getattr(observation, "metadata", {}) or {} + obs_dict = metadata.get("browsergym_obs", {}) or {} + if not goal: + goal = obs_dict.get("goal") or "" + if not url: + url = obs_dict.get("url") or "" + titles = obs_dict.get("open_pages_titles") or () + active_idx = _as_scalar(obs_dict.get("active_page_index")) + try: + active_idx = int(active_idx) + except Exception: + active_idx = 0 + if isinstance(titles, (list, tuple)) and 0 <= active_idx < len(titles): + title = titles[active_idx] or "" + return goal, url, title + + +def _extract_clickable_elements_lines(observation: Any) -> List[str]: + metadata = getattr(observation, "metadata", {}) or {} + obs_dict = metadata.get("browsergym_obs", {}) or {} + extra_props = obs_dict.get("extra_element_properties", {}) or {} + axtree_object = obs_dict.get("axtree_object") or {} + focused_bid = obs_dict.get("focused_element_bid") + bid_to_desc: Dict[str, tuple[str, str]] = {} + try: + nodes = axtree_object.get("nodes") or [] + for node in nodes: + bid = node.get("browsergym_id") + if bid is None: + continue + role = "" + name = "" + rf = node.get("role") or {} + if isinstance(rf, dict): + role = str(rf.get("value", "")).strip() + nf = node.get("name") or {} + if isinstance(nf, dict): + name = str(nf.get("value", "")).strip() + bid_to_desc[str(bid)] = (role, name) + except Exception: + pass + lines: List[str] = [] + for bid in sorted(extra_props.keys(), key=lambda x: str(x)): + props = extra_props[bid] or {} + if not props.get("clickable"): + continue + bbox = props.get("bbox") or [] + bbox_str = ", ".join(str(v) for v in bbox) if bbox else "?" + role, name = bid_to_desc.get(str(bid), ("", "")) + focus_tag = " [FOCUSED]" if (str(bid) == str(focused_bid)) else "" + rn = (role or "-") + if name: + rn = f"{rn} | {name}" + vis = props.get("visibility") + vis_str = f"{vis:.2f}" if isinstance(vis, (int, float)) else str(vis) if vis is not None else "?" + lines.append(f"- BID {bid}{focus_tag}: {rn} | bbox({bbox_str}) | visibility={vis_str}") + return lines + + +def _rank_clickables_lines(observation: Any, goal: str, top_n: int = 8) -> tuple[List[str], str | None]: + metadata = getattr(observation, "metadata", {}) or {} + obs_dict = metadata.get("browsergym_obs", {}) or {} + goal_lc = (goal or "").lower().strip() + extra_props = obs_dict.get("extra_element_properties", {}) or {} + axtree_object = obs_dict.get("axtree_object") or {} + focused_bid = str(obs_dict.get("focused_element_bid") or "") + bid_to_desc: Dict[str, tuple[str, str]] = {} + try: + nodes = axtree_object.get("nodes") or [] + for node in nodes: + bid = node.get("browsergym_id") + if bid is None: + continue + role = "" + name = "" + rf = node.get("role") or {} + if isinstance(rf, dict): + role = str(rf.get("value", "")).strip() + nf = node.get("name") or {} + if isinstance(nf, dict): + name = str(nf.get("value", "")).strip() + bid_to_desc[str(bid)] = (role, name) + except Exception: + pass + scored: List[tuple[float, str, str, str, str]] = [] + for bid_key in sorted(extra_props.keys(), key=lambda x: str(x)): + props = extra_props[bid_key] or {} + if not props.get("clickable"): + continue + role, name = bid_to_desc.get(str(bid_key), ("", "")) + name_lc = (name or "").lower() + score = 0.0 + if goal_lc and name_lc and (goal_lc in name_lc or name_lc in goal_lc): + score += 2.0 + if (role or "").lower() == "button": + score += 1.0 + if str(bid_key) == focused_bid: + score += 0.5 + vis = props.get("visibility") + try: + vis_f = float(vis) + score += max(0.0, min(1.0, vis_f)) + except Exception: + pass + bbox = props.get("bbox") or [] + bbox_str = ", ".join(str(v) for v in bbox) if bbox else "?" + rn = (role or "-") + if name: + rn = f"{rn} | {name}" + vis_str = f"{vis:.2f}" if isinstance(vis, (int, float)) else str(vis) if vis is not None else "?" + scored.append((score, str(bid_key), rn, bbox_str, vis_str)) + scored.sort(key=lambda t: t[0], reverse=True) + lines: List[str] = [] + recommended = scored[0][1] if scored else None + for idx, (score, bid, rn, bbox_str, vis_str) in enumerate(scored[:top_n], start=1): + lines.append(f"{idx}. BID {bid}: score={score:.2f} | {rn} | bbox({bbox_str}) | visibility={vis_str}") + return lines, recommended + + +def prompt_builder(observation: Any, step: int, history: List[str]) -> str: + goal, url, title = _extract_goal_url_title(observation) + url = url or "(unknown)" + error_note = "Yes" if getattr(observation, "last_action_error", False) else "No" + clickables_block = "\n".join(_extract_clickable_elements_lines(observation)) or "(none detected)" + ranked_lines, rec = _rank_clickables_lines(observation, goal, top_n=10) + ranked_block = "\n".join(ranked_lines) or "(none)" + text = getattr(observation, "text", "") or "" + text = text[:2048] + metadata = getattr(observation, "metadata", {}) or {} + obs_dict = metadata.get("browsergym_obs", {}) or {} + focused_bid = obs_dict.get("focused_element_bid") or "" + last_action = obs_dict.get("last_action") or "" + return ( + f"Step: {step}\n" + f"Goal: {goal}\n" + f"Current URL: {url}\n" + f"Title: {title}\n" + f"Previous steps:\n" + ("\n".join(history[-4:]) if history else "None") + "\n" + f"Last action: {last_action}\n" + f"Last action error: {error_note}\n" + f"Focused BID: {focused_bid}\n\n" + f"Clickable elements (BID: role | name | bbox | visibility):\n{clickables_block}\n\n" + f"Ranked clickable candidates (best first):\n{ranked_block}\n" + f"Recommended BID: {rec or '(none)'}\n\n" + "Instructions:\n" + "- Choose the most relevant clickable BID to achieve the goal.\n" + "- Prefer role=button or elements whose name matches the goal.\n" + "- Reply with a single action, e.g., click('13') or noop().\n\n" + f"Page excerpt:\n{text}\n\n" + "Reply with exactly one BrowserGym action string." + ).strip() + + +def action_parser(response_text: str): + try: + from envs.browsergym_env import BrowserGymAction # type: ignore + except Exception: + pytest.skip("OpenEnv (envs.browsergym_env) is not installed; skipping BrowserGym test.") + raise + if not response_text: + return BrowserGymAction(action_str="noop()") + for raw in response_text.splitlines(): + line = raw.strip() + if not line: + continue + m = ACTION_PATTERN.search(line) + if m: + parsed = re.sub(r"\s+", " ", m.group(0)) + return BrowserGymAction(action_str=parsed) + m = ACTION_PATTERN.search(response_text) + if m: + parsed = re.sub(r"\s+", " ", m.group(0)) + return BrowserGymAction(action_str=parsed) + return BrowserGymAction(action_str="noop()") + + +try: + from envs.browsergym_env import BrowserGymEnv # type: ignore + _HAS_BG = True +except Exception: + _HAS_BG = False + + +@evaluation_test( # type: ignore[misc] + input_dataset=["tests/pytest/data/openenv_browsergym_dataset.jsonl"], + dataset_adapter=openenv_dataset_to_rows, + completion_params=[ + { + "temperature": 0.0, + "max_tokens": 32, + "model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + } + ], + # Keep concurrency and steps low for a quick health-check + num_runs=1, + max_concurrent_rollouts=1, + mode="pointwise", + rollout_processor=( + OpenEnvRolloutProcessor( + env_client_cls=BrowserGymEnv if _HAS_BG else None, + prompt_builder=prompt_builder, + action_parser=action_parser, + tasks=[ + "click-test", + "click-button", + "click-button-sequence", + "click-checkboxes", + "click-checkboxes-soft", + "click-checkboxes-large", + "click-checkboxes-transfer", + ], + miniwob_url=os.getenv("MINIWOB_URL", "http://172.17.0.1:8888/miniwob/"), + docker_image="browsergym-env:latest", + benchmark="miniwob", + timeout_ms=10000, + num_generations=1, + ) + if _HAS_BG + else None + ), +) +def test_openenv_browsergym_eval(row: EvaluationRow) -> EvaluationRow: + """ + Smoke test to ensure OpenEnv + BrowserGym MiniWoB runs and returns a row. + The evaluation harness will assert basic invariants (no exceptions, etc.). + """ + if not _HAS_BG: + pytest.skip("OpenEnv (envs.browsergym_env) is not installed; skipping BrowserGym test.") + # Extract step rewards from the sentinel system message injected by the rollout processor + step_rewards: List[float] = [] + try: + for msg in row.messages or []: + if msg.role == "system" and isinstance(msg.content, str) and msg.content.startswith("__ep_step_rewards__:"): + import json as _json + payload = msg.content.split(":", 1)[1] + step_rewards = _json.loads(payload) or [] + break + except Exception: + step_rewards = [] + + total = float(sum(step_rewards)) if step_rewards else 0.0 + # Map total reward to a score in [0,1]; MiniWoB rewards are typically 0/1 or -1/1 + score = max(0.0, min(1.0, total)) + reason = f"Total reward={total:.2f} across {len(step_rewards)} steps" + row.evaluation_result = EvaluateResult(score=score, reason=reason) + return row + diff --git a/tests/pytest/test_openenv_echo_hub.py b/tests/pytest/test_openenv_echo_hub.py new file mode 100644 index 00000000..7ddd2b8c --- /dev/null +++ b/tests/pytest/test_openenv_echo_hub.py @@ -0,0 +1,109 @@ +from typing import Any, Dict, List +import os +import re + +from eval_protocol.models import EvaluationRow, Message, EvaluateResult +from eval_protocol.pytest import evaluation_test +from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor +import pytest +import os + +# Skip these integration-heavy tests on CI runners by default +pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI") + + +def echo_dataset_to_rows(data: List[Dict[str, Any]]) -> List[EvaluationRow]: + """ + Adapter: simple {"id": "...", "prompt": "..."} to EvaluationRows. + """ + rows: List[EvaluationRow] = [] + for row in data: + prompt = str(row.get("prompt", "hello")) + rows.append(EvaluationRow(messages=[Message(role="user", content=prompt)])) + return rows + + +def prompt_builder(observation: Any, step: int, history: List[str]) -> str: + """ + Echo env is very simple; we just send a short instruction. + """ + return "Please repeat back the next message exactly." + + +def action_parser(response_text: str): + """ + Convert raw model response to EchoAction. + """ + try: + from envs.echo_env import EchoAction # type: ignore + except Exception: + pytest.skip("OpenEnv (envs.echo_env) is not installed; skipping Echo hub test.") + raise + text = response_text.strip() if isinstance(response_text, str) else "" + return EchoAction(message=text or "hello") + + +try: + from envs.echo_env import EchoEnv # type: ignore + _HAS_ECHO = True +except Exception: + _HAS_ECHO = False + + +@evaluation_test( # type: ignore[misc] + input_dataset=["tests/pytest/data/echo_dataset.jsonl"], + dataset_adapter=echo_dataset_to_rows, + completion_params=[ + { + "temperature": 0.0, + "max_tokens": 16, + # Any working model with your API key; match other tests' default + "model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + } + ], + num_runs=1, + max_concurrent_rollouts=2, + mode="pointwise", + rollout_processor=( + OpenEnvRolloutProcessor( + # Use HF Hub to launch the environment container automatically + env_client_cls=EchoEnv, # type: ignore + hub_repo_id=os.getenv("OPENENV_ECHO_REPO", "openenv/echo-env"), + # Simple prompt+parser above + prompt_builder=prompt_builder, + action_parser=action_parser, + # Keep defaults for timeouts/viewport/etc. (not relevant for echo) + timeout_ms=5000, + num_generations=1, + ) + if _HAS_ECHO + else None + ), +) +def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow: + """ + Smoke test for Echo env via Hugging Face Hub (registry.hf.space/openenv-echo-env). + Extracts env rewards (from rollout policy extras) and sets evaluation_result. + """ + if not _HAS_ECHO: + pytest.skip("OpenEnv (envs.echo_env) is not installed; skipping Echo hub test.") + # Try to read rewards/usage left in execution metadata extra or system messages. + total_reward = 0.0 + try: + # Preferred path: system sentinel "__ep_step_rewards__" + step_rewards: List[float] = [] + for msg in row.messages or []: + if msg.role == "system" and isinstance(msg.content, str) and msg.content.startswith("__ep_step_rewards__:"): + import json as _json + payload = msg.content.split(":", 1)[1] + step_rewards = _json.loads(payload) or [] + break + total_reward = float(sum(step_rewards)) if step_rewards else 0.0 + except Exception: + total_reward = 0.0 + + score = max(0.0, min(1.0, total_reward)) + row.evaluation_result = EvaluateResult(score=score, reason=f"Echo total reward={total_reward:.2f}") + return row + + From ed93cb0b077be6c78e739b887e98afbb5fbf8a12 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Mon, 17 Nov 2025 21:01:41 +0000 Subject: [PATCH 02/10] openenvrolloutprocessor --- .../pytest/openenv_rollout_processor.py | 483 ++++++++++++++++++ 1 file changed, 483 insertions(+) create mode 100644 eval_protocol/pytest/openenv_rollout_processor.py diff --git a/eval_protocol/pytest/openenv_rollout_processor.py b/eval_protocol/pytest/openenv_rollout_processor.py new file mode 100644 index 00000000..fc9c14dd --- /dev/null +++ b/eval_protocol/pytest/openenv_rollout_processor.py @@ -0,0 +1,483 @@ +""" +OpenEnv Rollout Processor + +Generic processor for ANY OpenEnv environment using the standard HTTPEnvClient interface. +No environment-specific code - works with BrowserGym, Echo, TextArena, Atari, etc. + +Key: OpenEnv provides a standard interface across all environments: +- All environments: HTTPEnvClient[ActionType, ObservationType] +- All have: reset() → StepResult, step(action) → StepResult, state() → State +- Client handles serialization/deserialization + +This processor just calls env.reset(), env.step(), env.state() - that's it! +""" + +import asyncio +import logging +import time +from typing import List, Any, Dict, Callable, Generic, TypeVar, Optional, Type +import json + +from openai.types import CompletionUsage + +from eval_protocol.mcp.execution.policy import LiteLLMPolicy +from eval_protocol.models import EvaluationRow, Message +from eval_protocol.pytest.rollout_processor import RolloutProcessor +from eval_protocol.pytest.types import RolloutProcessorConfig + + logger = logging.getLogger(__name__) + + +class OpenEnvRolloutProcessor(RolloutProcessor): + """ + Generic rollout processor for ANY OpenEnv environment. + + Works with any environment that follows OpenEnv's standard interface: + - HTTPEnvClient[ActionType, ObservationType] + - reset() → StepResult[ObservationType] + - step(action: ActionType) → StepResult[ObservationType] + - state() → State + + No environment-specific code - just uses the standard interface! + + Examples: + ```python + # BrowserGym + from envs.browsergym_env import BrowserGymEnv, BrowserGymAction + def make_env(): + return BrowserGymEnv.from_docker_image(...) + + # Echo + from envs.echo_env import EchoEnv, EchoAction + def make_env(): + return EchoEnv.from_docker_image(...) + + # TextArena + from envs.textarena_env import TextArenaEnv, TextArenaAction + def make_env(): + return TextArenaEnv.from_docker_image(...) + + # Same processor works for all! + processor = OpenEnvRolloutProcessor( + env_factory=make_env, + action_parser=lambda text: BrowserGymAction(action_str=text), # or EchoAction(message=text), etc. + ) + ``` + + For TRL integration, see: trl-evalp/openenv_trl_integration.py + """ + + def __init__( + self, + env_factory: Optional[Callable] = None, + prompt_builder: Callable[[Any, int, List[str]], Any] | None = None, + action_parser: Callable[[str], Any] | None = None, + *, + # Environment construction parameters (generic HTTP client or Docker) + env_client_cls: Optional[Type[Any]] = None, + tasks: Optional[List[str]] = None, + miniwob_url: Optional[str] = None, + docker_image: str = "browsergym-env:latest", + env_base_url: Optional[str] = None, + hub_repo_id: Optional[str] = None, + request_timeout_s: float = 15.0, + default_headers: Optional[Dict[str, str]] = None, + provider: Any | None = None, + docker_port: Optional[int] = None, + env_vars: Optional[Dict[str, str]] = None, + benchmark: str = "miniwob", + headless: bool = True, + viewport_width: int = 1280, + viewport_height: int = 720, + timeout_ms: int = 10000, + num_generations: Optional[int] = None, + ): + """ + Initialize processor. + + Args: + env_factory: Optional callable that creates an OpenEnv environment (HTTPEnvClient) + Example: lambda: BrowserGymEnv.from_docker_image(...). If not provided, + the processor will build one using the parameters below. + prompt_builder: Optional function that builds the user message content from + (observation, step, history). It should return content + directly compatible with the LLM client (e.g., a string, + or OpenAI-style content list/dict). No additional processing + is performed by the processor. + action_parser: Function that converts LLM text → Action object + Example: lambda text: BrowserGymAction(action_str=text) + Example: lambda text: EchoAction(message=text) + env_client_cls: Optional environment HTTP client class (generic). + tasks, miniwob_url, docker_image, env_base_url, request_timeout_s, default_headers, + provider, docker_port, env_vars, benchmark, headless, viewport_*, timeout_ms: + Parameters to construct default environments if env_factory is not provided. + num_generations: Optional hint for task rotation grouping (used to mimic GRPO grouping). + """ + self.prompt_builder = prompt_builder or (lambda obs, step, history: str(obs)) + if action_parser is None: + raise ValueError("action_parser must be provided and return an Action object.") + self.action_parser = action_parser + + # Store env construction parameters + self._provided_env_factory = env_factory + self._env_client_cls = env_client_cls + self._tasks = tasks or [] + self._miniwob_url = miniwob_url + self._docker_image = docker_image + self._env_base_url = env_base_url + self._hub_repo_id = hub_repo_id + self._request_timeout_s = request_timeout_s + self._default_headers = default_headers + self._provider = provider + self._docker_port = docker_port + self._env_vars = env_vars or {} + self._benchmark = benchmark + self._headless = headless + self._viewport_width = viewport_width + self._viewport_height = viewport_height + self._timeout_ms = timeout_ms + self._num_generations = max(1, int(num_generations)) if num_generations else 1 + self._env_create_idx: int = 0 + + # Build env_factory if not provided + self.env_factory = self._build_env_factory() + + def __call__( + self, rows: List[EvaluationRow], config: RolloutProcessorConfig + ) -> List[asyncio.Task[EvaluationRow]]: + """Process evaluation rows and return async tasks.""" + + semaphore = config.semaphore + max_steps = config.steps or 8 + + async def process_row(row: EvaluationRow) -> EvaluationRow: + """Process a single row with OpenEnv rollout.""" + start_time = time.perf_counter() + + # Create environment + try: + print("[OpenEnvRolloutProcessor] Creating environment via env_factory() ...") + except Exception: + pass + env = self.env_factory() + try: + print("[OpenEnvRolloutProcessor] Environment client created.") + except Exception: + pass + + try: + # Get model config + raw_model = config.completion_params.get("model", "gpt-4o-mini") + model = raw_model + temperature = config.completion_params.get("temperature", 0.0) + max_tokens = config.completion_params.get("max_tokens", 100) + # Optional: direct routing or provider overrides (e.g., base_url, api_key, top_p, stop, etc.) + base_url = config.completion_params.get("base_url") + # Forward any extra completion params to LiteLLMPolicy (they will be sent per-request) + extra_params: Dict[str, Any] = dict(config.completion_params or {}) + for _k in ("model", "temperature", "max_tokens", "base_url"): + try: + extra_params.pop(_k, None) + except Exception: + pass + try: + print(f"[OpenEnvRolloutProcessor] Model='{model}' temp={temperature} max_tokens={max_tokens} base_url={base_url or '(default)'}") + except Exception: + pass + + # Create policy for generation + policy = LiteLLMPolicy( + model_id=model, + temperature=temperature, + max_tokens=max_tokens, + base_url=base_url, + **extra_params, + ) + + # Reset environment with simple transient-error retries + reset_attempts = 3 + reset_delay = 1.0 + last_exc = None + try: + print("[OpenEnvRolloutProcessor] Resetting environment ...") + except Exception: + pass + for i in range(reset_attempts): + try: + result = env.reset() + try: + print(f"[OpenEnvRolloutProcessor] reset() succeeded on attempt {i + 1}") + except Exception: + pass + break + except Exception as e: + last_exc = e + if i == reset_attempts - 1: + raise + time.sleep(reset_delay) + reset_delay *= 2.0 + observation = result.observation + + + # Initialize tracking + messages = list(row.messages) # Copy initial messages + # Inject system prompt if provided and not already present + try: + has_system = any(m.role == "system" for m in messages) + except Exception: + has_system = False + system_prompt = None + try: + system_prompt = config.completion_params.get("system_prompt") + except Exception: + system_prompt = None + if system_prompt and not has_system: + messages.insert(0, Message(role="system", content=system_prompt)) + usage = { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + } + step_rewards = [] + history: List[str] = [] + + # Agent loop: model → action → env.step → repeat + for step in range(max_steps): + if result.done: + logger.info(f"Episode done after {step} steps") + try: + print(f"[OpenEnvRolloutProcessor] Episode already done at step {step}") + except Exception: + pass + break + + # Build user message content via user-provided prompt_builder + try: + user_content = self.prompt_builder(observation, step + 1, history) + except Exception as e: + logger.error(f"prompt_builder failed: {e}", exc_info=True) + user_content = str(observation) + try: + print(f"[OpenEnvRolloutProcessor] Step {step + 1}: built user prompt (len={len(str(user_content))})") + except Exception: + pass + + messages.append(Message(role="user", content=user_content)) + # Optional tracing + if getattr(config, "logger", None): + try: + # Log a snapshot with current messages so UI shows incremental turns + try: + row_for_log = row.model_copy(deep=True) # pydantic v2 + except Exception: + import copy as _copy + row_for_log = _copy.deepcopy(row) + row_for_log.messages = list(messages) + config.logger.log(row_for_log) + except Exception: + pass + + # Call model to generate action (LiteLLM handles multimodal!) + try: + print(f"[OpenEnvRolloutProcessor] Calling model (messages={len(messages)}) ...") + except Exception: + pass + response = await policy._make_llm_call( + messages=[msg.model_dump() for msg in messages], + tools=None, # No tools - just text generation + ) + + # Update usage + usage["prompt_tokens"] += response["usage"]["prompt_tokens"] + usage["completion_tokens"] += response["usage"]["completion_tokens"] + usage["total_tokens"] += response["usage"]["total_tokens"] + + # Extract assistant message and parse into Action object + assistant_message = response["choices"][0]["message"]["content"] + try: + preview = assistant_message if isinstance(assistant_message, str) else str(assistant_message) + print(f"[OpenEnvRolloutProcessor] Model output (first 120): '{preview[:120] if preview else ''}'") + except Exception: + pass + action = self.action_parser(assistant_message) + try: + label = getattr(action, "action_str", None) + print(f"[OpenEnvRolloutProcessor] Parsed action='{(label or str(action))[:120]}'") + except Exception: + pass + + # Add assistant message (original content) + messages.append(Message(role="assistant", content=assistant_message)) + + # Execute action in environment (OpenEnv standard interface!) with transient-error retries + step_attempts = 2 + step_delay = 0.5 + for si in range(step_attempts): + try: + result = env.step(action) + break + except Exception as se: + if si == step_attempts - 1: + raise + time.sleep(step_delay) + + # Collect reward (OpenEnv standard: result.reward) + reward = float(result.reward or 0.0) + step_rewards.append(reward) + try: + print(f"[OpenEnvRolloutProcessor] Step {step + 1}: reward={reward} done={result.done}") + except Exception: + pass + _action_label = getattr(action, "action_str", None) + if not _action_label: + try: + _action_label = str(action) + except Exception: + _action_label = "" + logger.debug(f"Step {step}: action={_action_label}, reward={reward}") + + # Update observation (OpenEnv standard: result.observation) + observation = result.observation + + # Update history for next prompt + error_flag = getattr(observation, "last_action_error", False) + history_line = f"Step {step + 1}: {_action_label} -> reward {reward:+.2f}{' ERROR' if error_flag else ''}" + history.append(history_line) + # Optional tracing + if getattr(config, "logger", None): + try: + # Log a snapshot with current messages so UI shows incremental turns + try: + row_for_log = row.model_copy(deep=True) # pydantic v2 + except Exception: + import copy as _copy + row_for_log = _copy.deepcopy(row) + row_for_log.messages = list(messages) + config.logger.log(row_for_log) + except Exception: + pass + + # Update row with results + row.messages = messages + row.execution_metadata.usage = CompletionUsage( + prompt_tokens=usage["prompt_tokens"], + completion_tokens=usage["completion_tokens"], + total_tokens=usage["total_tokens"], + ) + row.execution_metadata.duration_seconds = time.perf_counter() - start_time + + # Store rewards for TRL reward functions via a system message sentinel + try: + sentinel = "__ep_step_rewards__:" + json.dumps(step_rewards) + messages.append(Message(role="system", content=sentinel)) + print(f"[OpenEnvRolloutProcessor] Total reward={sum(step_rewards):.2f} steps={len(step_rewards)}") + except Exception: + pass + + logger.info( + f"Rollout complete: {len(step_rewards)} steps, " + f"total_reward={sum(step_rewards):.2f}, " + f"duration={row.execution_metadata.duration_seconds:.2f}s" + ) + # Final log with complete message history + if getattr(config, "logger", None): + try: + config.logger.log(row) + except Exception: + pass + + return row + + except Exception as e: + logger.error(f"Error in rollout: {e}", exc_info=True) + try: + print(f"[OpenEnvRolloutProcessor][ERROR] {type(e).__name__}: {e}") + except Exception: + pass + raise + finally: + # Cleanup environment + try: + print("[OpenEnvRolloutProcessor] Closing environment client ...") + env.close() + print("[OpenEnvRolloutProcessor] Environment closed.") + except: + pass + + async def _sem_wrapper(r: EvaluationRow) -> EvaluationRow: + async with semaphore: + return await process_row(r) + + # Create and return tasks + tasks = [asyncio.create_task(_sem_wrapper(row)) for row in rows] + return tasks + + def _build_prompt(self, observation_text: str, step: int) -> str: + """ + Build prompt for LLM from observation text. + + Generic prompt that works for any environment. + """ + return ( + f"Step {step + 1}\n\n" + f"Observation:\n{observation_text}\n\n" + f"What action should be taken? Respond with a single action." + ) + + # Removed _extract_action_text: action parsing handled entirely by action_parser + + def _build_env_factory(self) -> Callable[[], Any]: + """ + Create or return an environment factory based on the provided parameters. + Preference order: + 1) Use provided env_factory + 2) Use generic env_client_cls + """ + if self._provided_env_factory is not None: + return self._provided_env_factory + + # If a generic client class is provided, use it + if self._env_client_cls is not None: + def _generic_factory(): + if self._env_base_url: + try: + print(f"[OpenEnvRolloutProcessor] Using env_client_cls base_url={self._env_base_url}") + except Exception: + pass + return self._env_client_cls( # type: ignore[call-arg] + base_url=self._env_base_url, + request_timeout_s=self._request_timeout_s, + default_headers=self._default_headers, + ) + docker_kwargs: Dict[str, Any] = {} + if self._env_vars: + docker_kwargs["env_vars"] = {k: str(v) for k, v in self._env_vars.items()} + if self._docker_port is not None: + docker_kwargs["port"] = int(self._docker_port) + if self._hub_repo_id: + try: + print(f"[OpenEnvRolloutProcessor] Launching from_hub repo_id='{self._hub_repo_id}' ...") + except Exception: + pass + return self._env_client_cls.from_hub( # type: ignore[attr-defined] + self._hub_repo_id, + provider=self._provider, + **docker_kwargs, + ) + else: + try: + print(f"[OpenEnvRolloutProcessor] Launching from_docker_image image='{self._docker_image}' ...") + except Exception: + pass + return self._env_client_cls.from_docker_image( # type: ignore[attr-defined] + self._docker_image, + provider=self._provider, + **docker_kwargs, + ) + return _generic_factory + + # No fallback: require an env_factory or env_client_cls + raise RuntimeError( + "OpenEnvRolloutProcessor requires either env_factory or env_client_cls. " + "Provide one of these to construct the environment." + ) From 7e71e03968ac9aff2416bc5e77d7772ccedf5af6 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Tue, 18 Nov 2025 08:16:09 +0000 Subject: [PATCH 03/10] trl integration --- .../pytest/integrations/openenv_trl_vllm.py | 308 ++++++++++++++++++ .../pytest/openenv_rollout_processor.py | 191 +++++++---- 2 files changed, 428 insertions(+), 71 deletions(-) create mode 100644 eval_protocol/pytest/integrations/openenv_trl_vllm.py diff --git a/eval_protocol/pytest/integrations/openenv_trl_vllm.py b/eval_protocol/pytest/integrations/openenv_trl_vllm.py new file mode 100644 index 00000000..5b12964a --- /dev/null +++ b/eval_protocol/pytest/integrations/openenv_trl_vllm.py @@ -0,0 +1,308 @@ +""" +Lightweight vLLM + OpenEnv Integration + +Minimal integration to use TRL's vLLM server for inference with OpenEnv BrowserGym +environments, wired into GRPO via a custom ``rollout_func``. + +- Uses TRL's ``VLLMClient`` (``use_vllm=True, vllm_mode="server"``) for inference +- Uses ``OpenEnvRolloutProcessor`` to drive OpenEnv (BrowserGym-style) environments +- Supports task rotation across MiniWoB tasks +- Returns Wordle-style GRPO data: 2D token lists and 1D per-episode rewards +- No Fireworks, no hot reload, no additional providers +""" + +from __future__ import annotations + +import asyncio +import sys +from typing import Any, Callable, Dict, List, Optional, Type + +from eval_protocol.models import EvaluationRow, InputMetadata, Message +from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor +from eval_protocol.pytest.types import RolloutProcessorConfig + + +def create_openenv_vllm_rollout_func( + env_factory: Callable[[], Any] | None, + prompt_builder: Callable[[Any, int, list[str]], Any], + action_parser: Callable[[str], Any], + vllm_base_url: str = "http://localhost:8000", + vllm_model: str = "Qwen/Qwen2.5-7B", + max_steps: int = 8, + *, + completion_params: Dict[str, Any] | None = None, + concurrency: int | None = None, + processor_cls: Optional[Type[Any]] = OpenEnvRolloutProcessor, + processor_kwargs: Optional[Dict[str, Any]] = None, + # Environment configuration + env_client_cls: Optional[Type[Any]] = None, + tasks: List[str] | None = None, + miniwob_url: str | None = None, + docker_image: str = "browsergym-env:latest", + env_base_url: Optional[str] = None, + request_timeout_s: float = 15.0, + default_headers: Optional[Dict[str, str]] = None, + provider: Any | None = None, + docker_port: Optional[int] = None, + env_vars: Optional[Dict[str, str]] = None, + benchmark: str = "miniwob", + headless: bool = True, + viewport_width: int = 1280, + viewport_height: int = 720, + timeout_ms: int = 10000, +): + """ + Build a TRL-compatible ``rollout_func`` using vLLM inference with OpenEnv. + + High-level: + - ``GRPOTrainer`` calls the returned ``rollout_func(prompts, trainer)`` + - For each prompt, we create ``num_generations`` evaluation rows + - ``OpenEnvRolloutProcessor`` runs BrowserGym-style episodes via Docker + - ``VLLMPolicy`` formats messages with the chat template and calls TRL's + vLLM server using ``trainer.vllm_client`` + - We accumulate tokens across all turns of an episode and sum rewards, + returning Wordle-style GRPO data. + + The environment side is configured via ``env_client_cls`` and the BrowserGym + parameters (``tasks``, ``miniwob_url``, ``docker_image``, etc.). + """ + print(f"\n{'='*80}", flush=True) + print(f"[openenv_trl_vllm] create_openenv_vllm_rollout_func() CALLED", flush=True) + print(f" vllm_base_url: {vllm_base_url}", flush=True) + print(f" vllm_model: {vllm_model}", flush=True) + print(f" tasks: {tasks}", flush=True) + print(f" max_steps: {max_steps}", flush=True) + print(f"{'='*80}", flush=True) + sys.stdout.flush() + + # Import VLLMPolicy + from eval_protocol.mcp.execution.vllm_policy import VLLMPolicy + + # Global-ish task rotation offset across rollout_func calls. + # This lets us rotate tasks between GRPO steps instead of always + # starting from tasks[0] when a new OpenEnvRolloutProcessor is created. + task_cycle_index: int = 0 + + def rollout_func(prompts: List[str], trainer) -> Dict[str, List]: + """Execute rollouts via OpenEnv + vLLM and return GRPO-compatible results.""" + print("\n[OpenEnvVLLM] rollout_func called", flush=True) + + # Extract args from trainer + args = trainer.args + processing_class = trainer.processing_class + + num_generations = getattr(args, "num_generations", 8) + print( + f"[OpenEnvVLLM] Received {len(prompts)} prompts, " + f"{num_generations} generations each", + flush=True, + ) + + # 1) Build evaluation rows + evaluation_rows: List[EvaluationRow] = [] + for prompt in prompts: + for gen_idx in range(num_generations): + evaluation_rows.append( + EvaluationRow( + messages=[Message(role="user", content=prompt)], + input_metadata=InputMetadata( + completion_params={}, + extra={"generation_idx": gen_idx} + ), + ) + ) + + # 2) Build processor config with VLLMPolicy + # We'll pass trainer.vllm_client to VLLMPolicy + base_params: Dict[str, Any] = { + "model": "dummy", # Not used by VLLMPolicy, but needed for config + "temperature": getattr(args, "temperature", 1.0), + "max_tokens": getattr(args, "max_completion_length", 100), + } + if completion_params: + base_params.update(completion_params) + + print( + f"[OpenEnvVLLM] Temperature={base_params['temperature']}, " + f"max_tokens={base_params['max_tokens']}", + flush=True, + ) + print("[OpenEnvVLLM] Using TRL VLLMClient from trainer", flush=True) + + max_concurrency = concurrency if concurrency is not None else getattr( + args, "per_device_train_batch_size", 1 + ) + print( + f"[OpenEnvVLLM] Max concurrency={max_concurrency}, " + f"max_steps={max_steps}", + flush=True, + ) + + config = RolloutProcessorConfig( + completion_params=base_params, + mcp_config_path="", + semaphore=asyncio.Semaphore(max_concurrency), + steps=max_steps, + ) + + # 3) Execute rollouts with VLLMPolicy + print( + f"[OpenEnvVLLM] Instantiating processor: " + f"{processor_cls.__name__ if processor_cls else 'OpenEnvRolloutProcessor'}", + flush=True, + ) + + # Create policy factory that uses trainer's vllm_client + def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs): + """Factory that creates VLLMPolicy using trainer's vllm_client.""" + return VLLMPolicy( + vllm_client=trainer.vllm_client, # Use trainer's vLLM client! + tokenizer=processing_class, # Pass tokenizer for decoding + temperature=temperature, + max_tokens=max_tokens, + top_p=kwargs.get("top_p"), + top_k=kwargs.get("top_k"), + **kwargs, + ) + + Processor = processor_cls or OpenEnvRolloutProcessor + _kwargs: Dict[str, Any] = dict(processor_kwargs or {}) + _kwargs.setdefault("env_factory", env_factory) + _kwargs.setdefault("prompt_builder", prompt_builder) + _kwargs.setdefault("action_parser", action_parser) + _kwargs.setdefault("policy_factory", vllm_policy_factory) # Pass VLLMPolicy factory! + _kwargs.setdefault("env_client_cls", env_client_cls) + + # Rotate tasks across rollout_func calls so each GRPO step + # primarily targets a different task, while keeping all + # generations within a step on the same task. + rotated_tasks = tasks + if tasks: + nonlocal task_cycle_index + offset = task_cycle_index % len(tasks) + rotated_tasks = tasks[offset:] + tasks[:offset] + task_cycle_index = (task_cycle_index + 1) % len(tasks) + print( + f"[OpenEnvVLLM] Task rotation offset={offset}, rotated={rotated_tasks}", + flush=True, + ) + _kwargs.setdefault("tasks", rotated_tasks) + + _kwargs.setdefault("miniwob_url", miniwob_url) + _kwargs.setdefault("docker_image", docker_image) + _kwargs.setdefault("env_base_url", env_base_url) + _kwargs.setdefault("request_timeout_s", request_timeout_s) + _kwargs.setdefault("default_headers", default_headers) + _kwargs.setdefault("provider", provider) + _kwargs.setdefault("docker_port", docker_port) + _kwargs.setdefault("env_vars", env_vars) + _kwargs.setdefault("benchmark", benchmark) + _kwargs.setdefault("headless", headless) + _kwargs.setdefault("viewport_width", viewport_width) + _kwargs.setdefault("viewport_height", viewport_height) + _kwargs.setdefault("timeout_ms", timeout_ms) + _kwargs.setdefault("num_generations", num_generations) + + processor = Processor(**_kwargs) + print(f"[OpenEnvVLLM] Processor instantiated successfully", flush=True) + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + async def _run_all(): + tasks_list = processor(evaluation_rows, config) + return await asyncio.gather(*tasks_list) + + completed_rows = loop.run_until_complete(_run_all()) + print( + f"[OpenEnvVLLM] All rollouts completed: {len(completed_rows)} results", + flush=True, + ) + finally: + loop.close() + + # 4) Convert to Wordle-style format (no splitting) + # Each completed_row is one rollout with multiple turns + # We .extend() tokens across turns, then .append() per rollout + print( + f"[OpenEnvVLLM] Converting {len(completed_rows)} rollouts to TRL format", + flush=True, + ) + + tokenizer = getattr(processing_class, "tokenizer", None) or processing_class + encode_fn = getattr(tokenizer, "encode", None) + + episode_prompt_ids: List[List[int]] = [] + episode_completion_ids: List[List[int]] = [] + episode_logprobs: List[List[float]] = [] + step_rewards_all: List[List[float]] = [] + + for idx, row in enumerate(completed_rows): + # Accumulate tokens across all turns in this rollout + prompt_ids: List[int] = [] # .extend() for each turn + completion_ids: List[int] = [] # .extend() for each turn + logprobs: List[float] = [] # .extend() for each turn + rewards: List[float] = [] + + # Go through all messages and accumulate tokens + for msg in row.messages: + if msg.role == "user": + tokens = encode_fn(msg.content or "") if encode_fn else [] + prompt_ids.extend(tokens) # Accumulate user tokens + elif msg.role == "assistant": + tokens = encode_fn(msg.content or "") if encode_fn else [] + completion_ids.extend(tokens) # Accumulate assistant tokens + logprobs.extend([0.0] * len(tokens)) # Placeholder logprobs + elif msg.role == "system": + # Extract step rewards + try: + content = msg.content or "" + if isinstance(content, str) and content.startswith("__ep_step_rewards__:"): + import json + payload = content.split(":", 1)[1] + rewards = json.loads(payload) or [] + except Exception: + pass + + # Fallback for rewards + if not rewards and hasattr(row.execution_metadata, "extra"): + try: + rewards = row.execution_metadata.extra.get("step_rewards", []) or [] + except Exception: + pass + + # Append accumulated tokens for this episode + episode_prompt_ids.append(prompt_ids if prompt_ids else [0]) + episode_completion_ids.append(completion_ids if completion_ids else [0]) + episode_logprobs.append(logprobs if logprobs else [0.0]) + step_rewards_all.append(rewards if rewards else [0.0]) + + total_reward = sum(sum(r) for r in step_rewards_all) + avg_reward = total_reward / len(step_rewards_all) if step_rewards_all else 0.0 + print( + f"[OpenEnvVLLM] Total reward={total_reward:.2f}, Avg reward={avg_reward:.2f}", + flush=True, + ) + print( + f"[OpenEnvVLLM] Returning {len(episode_prompt_ids)} episodes", flush=True + ) + sys.stdout.flush() + + # Return in Wordle format + # Tokens: 2D arrays (accumulate across turns, one list per episode) + # Rewards: 1D arrays (one scalar per episode) + total_rewards = [sum(r) for r in step_rewards_all] # Sum step rewards per episode + + print(f"[OpenEnvVLLM] Episode rewards: {total_rewards}", flush=True) + + return { + "prompt_ids": episode_prompt_ids, # List[List[int]] - tokens per episode + "completion_ids": episode_completion_ids, # List[List[int]] - tokens per episode + "logprobs": episode_logprobs, # List[List[float]] - logprobs per episode + "step_rewards": total_rewards, # List[float] - total reward per episode (1D!) + } + + print(f"[openenv_trl_vllm] Returning rollout_func (type={type(rollout_func)})", flush=True) + sys.stdout.flush() + return rollout_func + diff --git a/eval_protocol/pytest/openenv_rollout_processor.py b/eval_protocol/pytest/openenv_rollout_processor.py index fc9c14dd..f214c044 100644 --- a/eval_protocol/pytest/openenv_rollout_processor.py +++ b/eval_protocol/pytest/openenv_rollout_processor.py @@ -25,7 +25,7 @@ from eval_protocol.pytest.rollout_processor import RolloutProcessor from eval_protocol.pytest.types import RolloutProcessorConfig - logger = logging.getLogger(__name__) +logger = logging.getLogger(__name__) class OpenEnvRolloutProcessor(RolloutProcessor): @@ -73,6 +73,8 @@ def __init__( prompt_builder: Callable[[Any, int, List[str]], Any] | None = None, action_parser: Callable[[str], Any] | None = None, *, + # Policy parameter - NEW! + policy_factory: Optional[Callable] = None, # Factory to create policy from config # Environment construction parameters (generic HTTP client or Docker) env_client_cls: Optional[Type[Any]] = None, tasks: Optional[List[str]] = None, @@ -117,6 +119,7 @@ def __init__( if action_parser is None: raise ValueError("action_parser must be provided and return an Action object.") self.action_parser = action_parser + self.policy_factory = policy_factory # Store policy factory # Store env construction parameters self._provided_env_factory = env_factory @@ -150,20 +153,20 @@ def __call__( semaphore = config.semaphore max_steps = config.steps or 8 + print(f"\n[OpenEnvRolloutProcessor] __call__ invoked with {len(rows)} rows", flush=True) + print(f"[OpenEnvRolloutProcessor] Max steps: {max_steps}", flush=True) + print(f"[OpenEnvRolloutProcessor] Semaphore limit: {semaphore._value if hasattr(semaphore, '_value') else 'unknown'}", flush=True) + async def process_row(row: EvaluationRow) -> EvaluationRow: """Process a single row with OpenEnv rollout.""" start_time = time.perf_counter() + print(f"\n[OpenEnvRolloutProcessor] Starting rollout for row...", flush=True) + # Create environment - try: - print("[OpenEnvRolloutProcessor] Creating environment via env_factory() ...") - except Exception: - pass + print("[OpenEnvRolloutProcessor] Creating environment via env_factory()...", flush=True) env = self.env_factory() - try: - print("[OpenEnvRolloutProcessor] Environment client created.") - except Exception: - pass + print("[OpenEnvRolloutProcessor] Environment client created successfully.", flush=True) try: # Get model config @@ -180,35 +183,39 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: extra_params.pop(_k, None) except Exception: pass - try: - print(f"[OpenEnvRolloutProcessor] Model='{model}' temp={temperature} max_tokens={max_tokens} base_url={base_url or '(default)'}") - except Exception: - pass + print(f"[OpenEnvRolloutProcessor] Model='{model}' temp={temperature} max_tokens={max_tokens} base_url={base_url or '(default)'}", flush=True) # Create policy for generation - policy = LiteLLMPolicy( - model_id=model, - temperature=temperature, - max_tokens=max_tokens, - base_url=base_url, - **extra_params, - ) + if self.policy_factory is not None: + print(f"[OpenEnvRolloutProcessor] Creating policy using custom factory...", flush=True) + policy = self.policy_factory( + model=model, + temperature=temperature, + max_tokens=max_tokens, + base_url=base_url, + **extra_params, + ) + print(f"[OpenEnvRolloutProcessor] Custom policy created successfully", flush=True) + else: + print(f"[OpenEnvRolloutProcessor] Creating LiteLLMPolicy (default)...", flush=True) + policy = LiteLLMPolicy( + model_id=model, + temperature=temperature, + max_tokens=max_tokens, + base_url=base_url, + **extra_params, + ) + print(f"[OpenEnvRolloutProcessor] LiteLLMPolicy created successfully", flush=True) # Reset environment with simple transient-error retries reset_attempts = 3 reset_delay = 1.0 last_exc = None - try: - print("[OpenEnvRolloutProcessor] Resetting environment ...") - except Exception: - pass + print("[OpenEnvRolloutProcessor] Resetting environment...", flush=True) for i in range(reset_attempts): try: result = env.reset() - try: - print(f"[OpenEnvRolloutProcessor] reset() succeeded on attempt {i + 1}") - except Exception: - pass + print(f"[OpenEnvRolloutProcessor] reset() succeeded on attempt {i + 1}", flush=True) break except Exception as e: last_exc = e @@ -217,6 +224,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: time.sleep(reset_delay) reset_delay *= 2.0 observation = result.observation + print(f"[OpenEnvRolloutProcessor] Initial observation received", flush=True) # Initialize tracking @@ -241,26 +249,25 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: step_rewards = [] history: List[str] = [] + print(f"[OpenEnvRolloutProcessor] Starting agent loop (max {max_steps} steps)", flush=True) + # Agent loop: model → action → env.step → repeat for step in range(max_steps): + print(f"\n[OpenEnvRolloutProcessor] === STEP {step + 1}/{max_steps} ===", flush=True) + if result.done: logger.info(f"Episode done after {step} steps") - try: - print(f"[OpenEnvRolloutProcessor] Episode already done at step {step}") - except Exception: - pass + print(f"[OpenEnvRolloutProcessor] Episode already done at step {step}", flush=True) break # Build user message content via user-provided prompt_builder try: + print(f"[OpenEnvRolloutProcessor] Building prompt...", flush=True) user_content = self.prompt_builder(observation, step + 1, history) + print(f"[OpenEnvRolloutProcessor] Prompt built (len={len(str(user_content))})", flush=True) except Exception as e: logger.error(f"prompt_builder failed: {e}", exc_info=True) user_content = str(observation) - try: - print(f"[OpenEnvRolloutProcessor] Step {step + 1}: built user prompt (len={len(str(user_content))})") - except Exception: - pass messages.append(Message(role="user", content=user_content)) # Optional tracing @@ -278,56 +285,52 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: pass # Call model to generate action (LiteLLM handles multimodal!) - try: - print(f"[OpenEnvRolloutProcessor] Calling model (messages={len(messages)}) ...") - except Exception: - pass + print(f"[OpenEnvRolloutProcessor] Calling LLM (messages={len(messages)})...", flush=True) response = await policy._make_llm_call( messages=[msg.model_dump() for msg in messages], tools=None, # No tools - just text generation ) + print(f"[OpenEnvRolloutProcessor] LLM call completed", flush=True) # Update usage usage["prompt_tokens"] += response["usage"]["prompt_tokens"] usage["completion_tokens"] += response["usage"]["completion_tokens"] usage["total_tokens"] += response["usage"]["total_tokens"] + print(f"[OpenEnvRolloutProcessor] Tokens: prompt={response['usage']['prompt_tokens']}, completion={response['usage']['completion_tokens']}", flush=True) # Extract assistant message and parse into Action object assistant_message = response["choices"][0]["message"]["content"] - try: - preview = assistant_message if isinstance(assistant_message, str) else str(assistant_message) - print(f"[OpenEnvRolloutProcessor] Model output (first 120): '{preview[:120] if preview else ''}'") - except Exception: - pass + preview = assistant_message if isinstance(assistant_message, str) else str(assistant_message) + print(f"[OpenEnvRolloutProcessor] Model output: '{preview[:120] if preview else ''}'", flush=True) + + print(f"[OpenEnvRolloutProcessor] Parsing action...", flush=True) action = self.action_parser(assistant_message) - try: - label = getattr(action, "action_str", None) - print(f"[OpenEnvRolloutProcessor] Parsed action='{(label or str(action))[:120]}'") - except Exception: - pass + label = getattr(action, "action_str", None) or str(action) + print(f"[OpenEnvRolloutProcessor] Parsed action: '{label[:120]}'", flush=True) # Add assistant message (original content) messages.append(Message(role="assistant", content=assistant_message)) # Execute action in environment (OpenEnv standard interface!) with transient-error retries + print(f"[OpenEnvRolloutProcessor] Executing action in environment...", flush=True) step_attempts = 2 step_delay = 0.5 for si in range(step_attempts): try: result = env.step(action) + print(f"[OpenEnvRolloutProcessor] env.step() succeeded", flush=True) break except Exception as se: if si == step_attempts - 1: + print(f"[OpenEnvRolloutProcessor] env.step() failed after {step_attempts} attempts: {se}", flush=True) raise time.sleep(step_delay) # Collect reward (OpenEnv standard: result.reward) reward = float(result.reward or 0.0) step_rewards.append(reward) - try: - print(f"[OpenEnvRolloutProcessor] Step {step + 1}: reward={reward} done={result.done}") - except Exception: - pass + print(f"[OpenEnvRolloutProcessor] Step {step + 1}: reward={reward:.3f}, done={result.done}", flush=True) + _action_label = getattr(action, "action_str", None) if not _action_label: try: @@ -367,16 +370,19 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: row.execution_metadata.duration_seconds = time.perf_counter() - start_time # Store rewards for TRL reward functions via a system message sentinel - try: - sentinel = "__ep_step_rewards__:" + json.dumps(step_rewards) - messages.append(Message(role="system", content=sentinel)) - print(f"[OpenEnvRolloutProcessor] Total reward={sum(step_rewards):.2f} steps={len(step_rewards)}") - except Exception: - pass + sentinel = "__ep_step_rewards__:" + json.dumps(step_rewards) + messages.append(Message(role="system", content=sentinel)) + + total_reward = sum(step_rewards) + print(f"\n[OpenEnvRolloutProcessor] ✅ ROLLOUT COMPLETE", flush=True) + print(f"[OpenEnvRolloutProcessor] Steps: {len(step_rewards)}", flush=True) + print(f"[OpenEnvRolloutProcessor] Total reward: {total_reward:.3f}", flush=True) + print(f"[OpenEnvRolloutProcessor] Duration: {row.execution_metadata.duration_seconds:.2f}s", flush=True) + print(f"[OpenEnvRolloutProcessor] Messages collected: {len(messages)}", flush=True) logger.info( f"Rollout complete: {len(step_rewards)} steps, " - f"total_reward={sum(step_rewards):.2f}, " + f"total_reward={total_reward:.2f}, " f"duration={row.execution_metadata.duration_seconds:.2f}s" ) # Final log with complete message history @@ -390,26 +396,25 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: except Exception as e: logger.error(f"Error in rollout: {e}", exc_info=True) - try: - print(f"[OpenEnvRolloutProcessor][ERROR] {type(e).__name__}: {e}") - except Exception: - pass + print(f"\n[OpenEnvRolloutProcessor] ❌ ERROR in rollout: {type(e).__name__}: {e}", flush=True) raise finally: # Cleanup environment + print("[OpenEnvRolloutProcessor] Closing environment client...", flush=True) try: - print("[OpenEnvRolloutProcessor] Closing environment client ...") env.close() - print("[OpenEnvRolloutProcessor] Environment closed.") - except: - pass + print("[OpenEnvRolloutProcessor] Environment closed successfully.", flush=True) + except Exception as close_err: + print(f"[OpenEnvRolloutProcessor] Error closing environment: {close_err}", flush=True) async def _sem_wrapper(r: EvaluationRow) -> EvaluationRow: async with semaphore: return await process_row(r) # Create and return tasks + print(f"[OpenEnvRolloutProcessor] Creating {len(rows)} async tasks...", flush=True) tasks = [asyncio.create_task(_sem_wrapper(row)) for row in rows] + print(f"[OpenEnvRolloutProcessor] Returning {len(tasks)} tasks", flush=True) return tasks def _build_prompt(self, observation_text: str, step: int) -> str: @@ -431,7 +436,7 @@ def _build_env_factory(self) -> Callable[[], Any]: Create or return an environment factory based on the provided parameters. Preference order: 1) Use provided env_factory - 2) Use generic env_client_cls + 2) Use generic env_client_cls with task-aware env vars (BrowserGym-style) """ if self._provided_env_factory is not None: return self._provided_env_factory @@ -449,9 +454,53 @@ def _generic_factory(): request_timeout_s=self._request_timeout_s, default_headers=self._default_headers, ) + + # ------------------------------ + # Docker-based env: build env_vars with task rotation + # ------------------------------ docker_kwargs: Dict[str, Any] = {} + + # 1) Build default BrowserGym-style env vars + env_vars_default: Dict[str, str] = { + "BROWSERGYM_BENCHMARK": str(self._benchmark), + "BROWSERGYM_HEADLESS": "true" if self._headless else "false", + "BROWSERGYM_VIEWPORT_WIDTH": str(self._viewport_width), + "BROWSERGYM_VIEWPORT_HEIGHT": str(self._viewport_height), + "BROWSERGYM_TIMEOUT": str(int(self._timeout_ms)), + # Keep obs/info flags consistent for BrowserGym + "BROWSERGYM_OBS_AXTREE": "1", + "BROWSERGYM_OBS_PRUNED_HTML": "1", + "BROWSERGYM_RETURN_INFO": "1", + } + + # 2) Select task for this env instance (if provided), grouped by num_generations + selected_task: Optional[str] = None + if self._tasks: + idx = self._env_create_idx + self._env_create_idx = idx + 1 + group = idx // max(1, self._num_generations) + selected_task = self._tasks[group % len(self._tasks)] + env_vars_default["BROWSERGYM_TASK_NAME"] = str(selected_task) + try: + print( + "[OpenEnvRolloutProcessor] Task selection:" + f" idx={idx}, group={group}, num_generations={self._num_generations}," + f" selected_task={selected_task}, tasks={self._tasks}", + flush=True, + ) + except Exception: + pass + + # 3) MiniWoB URL (if provided) + if self._miniwob_url: + env_vars_default["MINIWOB_URL"] = str(self._miniwob_url) + + # 4) Merge user-provided env vars (override defaults) if self._env_vars: - docker_kwargs["env_vars"] = {k: str(v) for k, v in self._env_vars.items()} + env_vars_default.update({k: str(v) for k, v in self._env_vars.items()}) + + docker_kwargs["env_vars"] = env_vars_default + if self._docker_port is not None: docker_kwargs["port"] = int(self._docker_port) if self._hub_repo_id: From 8638c2a872d0bb3a896876a54ff840611a465e35 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Tue, 18 Nov 2025 17:16:54 -0800 Subject: [PATCH 04/10] comments --- .../pytest/integrations/openenv_trl_vllm.py | 105 ++- .../pytest/openenv_rollout_processor.py | 319 +++++---- pyproject.toml | 5 + tests/pytest/test_openenv_browsergym_basic.py | 20 +- tests/pytest/test_openenv_browsergym_eval.py | 48 +- uv.lock | 652 +++++++++++++----- 6 files changed, 760 insertions(+), 389 deletions(-) diff --git a/eval_protocol/pytest/integrations/openenv_trl_vllm.py b/eval_protocol/pytest/integrations/openenv_trl_vllm.py index 5b12964a..58d1fbd8 100644 --- a/eval_protocol/pytest/integrations/openenv_trl_vllm.py +++ b/eval_protocol/pytest/integrations/openenv_trl_vllm.py @@ -37,6 +37,7 @@ def create_openenv_vllm_rollout_func( # Environment configuration env_client_cls: Optional[Type[Any]] = None, tasks: List[str] | None = None, + task_var: Optional[str] = None, miniwob_url: str | None = None, docker_image: str = "browsergym-env:latest", env_base_url: Optional[str] = None, @@ -66,15 +67,15 @@ def create_openenv_vllm_rollout_func( The environment side is configured via ``env_client_cls`` and the BrowserGym parameters (``tasks``, ``miniwob_url``, ``docker_image``, etc.). """ - print(f"\n{'='*80}", flush=True) - print(f"[openenv_trl_vllm] create_openenv_vllm_rollout_func() CALLED", flush=True) + print(f"\n{'=' * 80}", flush=True) + print("[openenv_trl_vllm] create_openenv_vllm_rollout_func() CALLED", flush=True) print(f" vllm_base_url: {vllm_base_url}", flush=True) print(f" vllm_model: {vllm_model}", flush=True) print(f" tasks: {tasks}", flush=True) print(f" max_steps: {max_steps}", flush=True) - print(f"{'='*80}", flush=True) + print(f"{'=' * 80}", flush=True) sys.stdout.flush() - + # Import VLLMPolicy from eval_protocol.mcp.execution.vllm_policy import VLLMPolicy @@ -82,36 +83,32 @@ def create_openenv_vllm_rollout_func( # This lets us rotate tasks between GRPO steps instead of always # starting from tasks[0] when a new OpenEnvRolloutProcessor is created. task_cycle_index: int = 0 - + def rollout_func(prompts: List[str], trainer) -> Dict[str, List]: """Execute rollouts via OpenEnv + vLLM and return GRPO-compatible results.""" print("\n[OpenEnvVLLM] rollout_func called", flush=True) - + # Extract args from trainer args = trainer.args processing_class = trainer.processing_class - + num_generations = getattr(args, "num_generations", 8) print( - f"[OpenEnvVLLM] Received {len(prompts)} prompts, " - f"{num_generations} generations each", + f"[OpenEnvVLLM] Received {len(prompts)} prompts, {num_generations} generations each", flush=True, ) - + # 1) Build evaluation rows evaluation_rows: List[EvaluationRow] = [] for prompt in prompts: for gen_idx in range(num_generations): - evaluation_rows.append( - EvaluationRow( - messages=[Message(role="user", content=prompt)], - input_metadata=InputMetadata( - completion_params={}, - extra={"generation_idx": gen_idx} - ), - ) + row = EvaluationRow( + messages=[Message(role="user", content=prompt)], + input_metadata=InputMetadata(completion_params={}), ) - + row.input_metadata.generation_idx = gen_idx # type: ignore[attr-defined] + evaluation_rows.append(row) + # 2) Build processor config with VLLMPolicy # We'll pass trainer.vllm_client to VLLMPolicy base_params: Dict[str, Any] = { @@ -121,37 +118,33 @@ def rollout_func(prompts: List[str], trainer) -> Dict[str, List]: } if completion_params: base_params.update(completion_params) - + print( - f"[OpenEnvVLLM] Temperature={base_params['temperature']}, " - f"max_tokens={base_params['max_tokens']}", + f"[OpenEnvVLLM] Temperature={base_params['temperature']}, max_tokens={base_params['max_tokens']}", flush=True, ) print("[OpenEnvVLLM] Using TRL VLLMClient from trainer", flush=True) - - max_concurrency = concurrency if concurrency is not None else getattr( - args, "per_device_train_batch_size", 1 - ) + + max_concurrency = concurrency if concurrency is not None else getattr(args, "per_device_train_batch_size", 1) print( - f"[OpenEnvVLLM] Max concurrency={max_concurrency}, " - f"max_steps={max_steps}", + f"[OpenEnvVLLM] Max concurrency={max_concurrency}, max_steps={max_steps}", flush=True, ) - + config = RolloutProcessorConfig( completion_params=base_params, mcp_config_path="", semaphore=asyncio.Semaphore(max_concurrency), steps=max_steps, ) - + # 3) Execute rollouts with VLLMPolicy print( f"[OpenEnvVLLM] Instantiating processor: " f"{processor_cls.__name__ if processor_cls else 'OpenEnvRolloutProcessor'}", flush=True, ) - + # Create policy factory that uses trainer's vllm_client def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs): """Factory that creates VLLMPolicy using trainer's vllm_client.""" @@ -164,7 +157,7 @@ def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs) top_k=kwargs.get("top_k"), **kwargs, ) - + Processor = processor_cls or OpenEnvRolloutProcessor _kwargs: Dict[str, Any] = dict(processor_kwargs or {}) _kwargs.setdefault("env_factory", env_factory) @@ -187,6 +180,7 @@ def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs) flush=True, ) _kwargs.setdefault("tasks", rotated_tasks) + _kwargs.setdefault("task_var", task_var) _kwargs.setdefault("miniwob_url", miniwob_url) _kwargs.setdefault("docker_image", docker_image) @@ -202,17 +196,18 @@ def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs) _kwargs.setdefault("viewport_height", viewport_height) _kwargs.setdefault("timeout_ms", timeout_ms) _kwargs.setdefault("num_generations", num_generations) - + processor = Processor(**_kwargs) - print(f"[OpenEnvVLLM] Processor instantiated successfully", flush=True) - + print("[OpenEnvVLLM] Processor instantiated successfully", flush=True) + loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: + async def _run_all(): tasks_list = processor(evaluation_rows, config) return await asyncio.gather(*tasks_list) - + completed_rows = loop.run_until_complete(_run_all()) print( f"[OpenEnvVLLM] All rollouts completed: {len(completed_rows)} results", @@ -220,7 +215,7 @@ async def _run_all(): ) finally: loop.close() - + # 4) Convert to Wordle-style format (no splitting) # Each completed_row is one rollout with multiple turns # We .extend() tokens across turns, then .append() per rollout @@ -228,22 +223,22 @@ async def _run_all(): f"[OpenEnvVLLM] Converting {len(completed_rows)} rollouts to TRL format", flush=True, ) - + tokenizer = getattr(processing_class, "tokenizer", None) or processing_class encode_fn = getattr(tokenizer, "encode", None) - + episode_prompt_ids: List[List[int]] = [] episode_completion_ids: List[List[int]] = [] episode_logprobs: List[List[float]] = [] step_rewards_all: List[List[float]] = [] - + for idx, row in enumerate(completed_rows): # Accumulate tokens across all turns in this rollout prompt_ids: List[int] = [] # .extend() for each turn completion_ids: List[int] = [] # .extend() for each turn logprobs: List[float] = [] # .extend() for each turn rewards: List[float] = [] - + # Go through all messages and accumulate tokens for msg in row.messages: if msg.role == "user": @@ -259,50 +254,50 @@ async def _run_all(): content = msg.content or "" if isinstance(content, str) and content.startswith("__ep_step_rewards__:"): import json + payload = content.split(":", 1)[1] rewards = json.loads(payload) or [] except Exception: pass - - # Fallback for rewards - if not rewards and hasattr(row.execution_metadata, "extra"): + + # Fallback for rewards (if extra field exists via model_config extra="allow") + if not rewards: try: - rewards = row.execution_metadata.extra.get("step_rewards", []) or [] + extra = getattr(row.execution_metadata, "extra", None) + if isinstance(extra, dict): + rewards = extra.get("step_rewards", []) or [] except Exception: pass - + # Append accumulated tokens for this episode episode_prompt_ids.append(prompt_ids if prompt_ids else [0]) episode_completion_ids.append(completion_ids if completion_ids else [0]) episode_logprobs.append(logprobs if logprobs else [0.0]) step_rewards_all.append(rewards if rewards else [0.0]) - + total_reward = sum(sum(r) for r in step_rewards_all) avg_reward = total_reward / len(step_rewards_all) if step_rewards_all else 0.0 print( f"[OpenEnvVLLM] Total reward={total_reward:.2f}, Avg reward={avg_reward:.2f}", flush=True, ) - print( - f"[OpenEnvVLLM] Returning {len(episode_prompt_ids)} episodes", flush=True - ) + print(f"[OpenEnvVLLM] Returning {len(episode_prompt_ids)} episodes", flush=True) sys.stdout.flush() - + # Return in Wordle format # Tokens: 2D arrays (accumulate across turns, one list per episode) # Rewards: 1D arrays (one scalar per episode) total_rewards = [sum(r) for r in step_rewards_all] # Sum step rewards per episode - + print(f"[OpenEnvVLLM] Episode rewards: {total_rewards}", flush=True) - + return { "prompt_ids": episode_prompt_ids, # List[List[int]] - tokens per episode "completion_ids": episode_completion_ids, # List[List[int]] - tokens per episode "logprobs": episode_logprobs, # List[List[float]] - logprobs per episode "step_rewards": total_rewards, # List[float] - total reward per episode (1D!) } - + print(f"[openenv_trl_vllm] Returning rollout_func (type={type(rollout_func)})", flush=True) sys.stdout.flush() return rollout_func - diff --git a/eval_protocol/pytest/openenv_rollout_processor.py b/eval_protocol/pytest/openenv_rollout_processor.py index f214c044..c3716c48 100644 --- a/eval_protocol/pytest/openenv_rollout_processor.py +++ b/eval_protocol/pytest/openenv_rollout_processor.py @@ -24,46 +24,46 @@ from eval_protocol.models import EvaluationRow, Message from eval_protocol.pytest.rollout_processor import RolloutProcessor from eval_protocol.pytest.types import RolloutProcessorConfig - + logger = logging.getLogger(__name__) class OpenEnvRolloutProcessor(RolloutProcessor): """ Generic rollout processor for ANY OpenEnv environment. - + Works with any environment that follows OpenEnv's standard interface: - HTTPEnvClient[ActionType, ObservationType] - reset() → StepResult[ObservationType] - step(action: ActionType) → StepResult[ObservationType] - state() → State - + No environment-specific code - just uses the standard interface! - + Examples: ```python # BrowserGym from envs.browsergym_env import BrowserGymEnv, BrowserGymAction def make_env(): return BrowserGymEnv.from_docker_image(...) - + # Echo from envs.echo_env import EchoEnv, EchoAction def make_env(): return EchoEnv.from_docker_image(...) - + # TextArena from envs.textarena_env import TextArenaEnv, TextArenaAction def make_env(): return TextArenaEnv.from_docker_image(...) - + # Same processor works for all! processor = OpenEnvRolloutProcessor( env_factory=make_env, action_parser=lambda text: BrowserGymAction(action_str=text), # or EchoAction(message=text), etc. ) ``` - + For TRL integration, see: trl-evalp/openenv_trl_integration.py """ @@ -74,10 +74,11 @@ def __init__( action_parser: Callable[[str], Any] | None = None, *, # Policy parameter - NEW! - policy_factory: Optional[Callable] = None, # Factory to create policy from config + policy_factory: Optional[Callable[..., Any]] = None, # Factory to create policy from config # Environment construction parameters (generic HTTP client or Docker) env_client_cls: Optional[Type[Any]] = None, tasks: Optional[List[str]] = None, + task_var: Optional[str] = None, miniwob_url: Optional[str] = None, docker_image: str = "browsergym-env:latest", env_base_url: Optional[str] = None, @@ -96,7 +97,7 @@ def __init__( ): """ Initialize processor. - + Args: env_factory: Optional callable that creates an OpenEnv environment (HTTPEnvClient) Example: lambda: BrowserGymEnv.from_docker_image(...). If not provided, @@ -110,7 +111,7 @@ def __init__( Example: lambda text: BrowserGymAction(action_str=text) Example: lambda text: EchoAction(message=text) env_client_cls: Optional environment HTTP client class (generic). - tasks, miniwob_url, docker_image, env_base_url, request_timeout_s, default_headers, + tasks, task_var, miniwob_url, docker_image, env_base_url, request_timeout_s, default_headers, provider, docker_port, env_vars, benchmark, headless, viewport_*, timeout_ms: Parameters to construct default environments if env_factory is not provided. num_generations: Optional hint for task rotation grouping (used to mimic GRPO grouping). @@ -125,6 +126,7 @@ def __init__( self._provided_env_factory = env_factory self._env_client_cls = env_client_cls self._tasks = tasks or [] + self._task_var = task_var self._miniwob_url = miniwob_url self._docker_image = docker_image self._env_base_url = env_base_url @@ -133,7 +135,7 @@ def __init__( self._default_headers = default_headers self._provider = provider self._docker_port = docker_port - self._env_vars = env_vars or {} + self._env_vars = {k: str(v) for k, v in (env_vars or {}).items()} self._benchmark = benchmark self._headless = headless self._viewport_width = viewport_width @@ -142,32 +144,36 @@ def __init__( self._num_generations = max(1, int(num_generations)) if num_generations else 1 self._env_create_idx: int = 0 + if self._tasks and not self._task_var: + raise ValueError("task_var must be provided when tasks are configured.") + # Build env_factory if not provided self.env_factory = self._build_env_factory() - - def __call__( - self, rows: List[EvaluationRow], config: RolloutProcessorConfig - ) -> List[asyncio.Task[EvaluationRow]]: + + def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> List[asyncio.Task[EvaluationRow]]: """Process evaluation rows and return async tasks.""" - + semaphore = config.semaphore max_steps = config.steps or 8 - - print(f"\n[OpenEnvRolloutProcessor] __call__ invoked with {len(rows)} rows", flush=True) - print(f"[OpenEnvRolloutProcessor] Max steps: {max_steps}", flush=True) - print(f"[OpenEnvRolloutProcessor] Semaphore limit: {semaphore._value if hasattr(semaphore, '_value') else 'unknown'}", flush=True) - + + logger.info("[OpenEnvRolloutProcessor] __call__ invoked with %d rows", len(rows)) + logger.info("[OpenEnvRolloutProcessor] Max steps: %d", max_steps) + logger.debug( + "[OpenEnvRolloutProcessor] Semaphore limit: %s", + getattr(semaphore, "_value", "unknown"), + ) + async def process_row(row: EvaluationRow) -> EvaluationRow: """Process a single row with OpenEnv rollout.""" start_time = time.perf_counter() - - print(f"\n[OpenEnvRolloutProcessor] Starting rollout for row...", flush=True) - + + logger.info("[OpenEnvRolloutProcessor] Starting rollout for row") + # Create environment - print("[OpenEnvRolloutProcessor] Creating environment via env_factory()...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Creating environment via env_factory()") env = self.env_factory() - print("[OpenEnvRolloutProcessor] Environment client created successfully.", flush=True) - + logger.debug("[OpenEnvRolloutProcessor] Environment client created successfully") + try: # Get model config raw_model = config.completion_params.get("model", "gpt-4o-mini") @@ -183,11 +189,17 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: extra_params.pop(_k, None) except Exception: pass - print(f"[OpenEnvRolloutProcessor] Model='{model}' temp={temperature} max_tokens={max_tokens} base_url={base_url or '(default)'}", flush=True) - + logger.info( + "[OpenEnvRolloutProcessor] Model='%s' temp=%s max_tokens=%s base_url=%s", + model, + temperature, + max_tokens, + base_url or "(default)", + ) + # Create policy for generation if self.policy_factory is not None: - print(f"[OpenEnvRolloutProcessor] Creating policy using custom factory...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Creating policy using custom factory") policy = self.policy_factory( model=model, temperature=temperature, @@ -195,9 +207,9 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: base_url=base_url, **extra_params, ) - print(f"[OpenEnvRolloutProcessor] Custom policy created successfully", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Custom policy created successfully") else: - print(f"[OpenEnvRolloutProcessor] Creating LiteLLMPolicy (default)...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Creating LiteLLMPolicy (default)") policy = LiteLLMPolicy( model_id=model, temperature=temperature, @@ -205,40 +217,35 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: base_url=base_url, **extra_params, ) - print(f"[OpenEnvRolloutProcessor] LiteLLMPolicy created successfully", flush=True) - + logger.debug("[OpenEnvRolloutProcessor] LiteLLMPolicy created successfully") + # Reset environment with simple transient-error retries reset_attempts = 3 reset_delay = 1.0 - last_exc = None - print("[OpenEnvRolloutProcessor] Resetting environment...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Resetting environment") + result = None for i in range(reset_attempts): try: result = env.reset() - print(f"[OpenEnvRolloutProcessor] reset() succeeded on attempt {i + 1}", flush=True) + logger.debug("[OpenEnvRolloutProcessor] reset() succeeded on attempt %d", i + 1) break except Exception as e: - last_exc = e if i == reset_attempts - 1: raise time.sleep(reset_delay) reset_delay *= 2.0 + + if result is None: + raise RuntimeError("Failed to reset environment after all retry attempts") + observation = result.observation - print(f"[OpenEnvRolloutProcessor] Initial observation received", flush=True) - - + logger.debug("[OpenEnvRolloutProcessor] Initial observation received") + # Initialize tracking messages = list(row.messages) # Copy initial messages # Inject system prompt if provided and not already present - try: - has_system = any(m.role == "system" for m in messages) - except Exception: - has_system = False - system_prompt = None - try: - system_prompt = config.completion_params.get("system_prompt") - except Exception: - system_prompt = None + has_system = any(m.role == "system" for m in messages) + system_prompt = config.completion_params.get("system_prompt") if system_prompt and not has_system: messages.insert(0, Message(role="system", content=system_prompt)) usage = { @@ -248,27 +255,30 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: } step_rewards = [] history: List[str] = [] - - print(f"[OpenEnvRolloutProcessor] Starting agent loop (max {max_steps} steps)", flush=True) - + + logger.info("[OpenEnvRolloutProcessor] Starting agent loop (max %d steps)", max_steps) + # Agent loop: model → action → env.step → repeat for step in range(max_steps): - print(f"\n[OpenEnvRolloutProcessor] === STEP {step + 1}/{max_steps} ===", flush=True) - + logger.debug("[OpenEnvRolloutProcessor] === STEP %d/%d ===", step + 1, max_steps) + if result.done: logger.info(f"Episode done after {step} steps") - print(f"[OpenEnvRolloutProcessor] Episode already done at step {step}", flush=True) + logger.info("[OpenEnvRolloutProcessor] Episode already done at step %d", step) break - + # Build user message content via user-provided prompt_builder try: - print(f"[OpenEnvRolloutProcessor] Building prompt...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Building prompt") user_content = self.prompt_builder(observation, step + 1, history) - print(f"[OpenEnvRolloutProcessor] Prompt built (len={len(str(user_content))})", flush=True) + logger.debug( + "[OpenEnvRolloutProcessor] Prompt built (len=%d)", + len(str(user_content)), + ) except Exception as e: logger.error(f"prompt_builder failed: {e}", exc_info=True) user_content = str(observation) - + messages.append(Message(role="user", content=user_content)) # Optional tracing if getattr(config, "logger", None): @@ -278,59 +288,76 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: row_for_log = row.model_copy(deep=True) # pydantic v2 except Exception: import copy as _copy + row_for_log = _copy.deepcopy(row) row_for_log.messages = list(messages) config.logger.log(row_for_log) except Exception: pass - + # Call model to generate action (LiteLLM handles multimodal!) - print(f"[OpenEnvRolloutProcessor] Calling LLM (messages={len(messages)})...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Calling LLM (messages=%d)", len(messages)) response = await policy._make_llm_call( messages=[msg.model_dump() for msg in messages], - tools=None, # No tools - just text generation + tools=[], # No tools - just text generation ) - print(f"[OpenEnvRolloutProcessor] LLM call completed", flush=True) - + logger.debug("[OpenEnvRolloutProcessor] LLM call completed") + # Update usage usage["prompt_tokens"] += response["usage"]["prompt_tokens"] usage["completion_tokens"] += response["usage"]["completion_tokens"] usage["total_tokens"] += response["usage"]["total_tokens"] - print(f"[OpenEnvRolloutProcessor] Tokens: prompt={response['usage']['prompt_tokens']}, completion={response['usage']['completion_tokens']}", flush=True) - + logger.debug( + "[OpenEnvRolloutProcessor] Tokens: prompt=%s, completion=%s", + response["usage"]["prompt_tokens"], + response["usage"]["completion_tokens"], + ) + # Extract assistant message and parse into Action object assistant_message = response["choices"][0]["message"]["content"] preview = assistant_message if isinstance(assistant_message, str) else str(assistant_message) - print(f"[OpenEnvRolloutProcessor] Model output: '{preview[:120] if preview else ''}'", flush=True) - - print(f"[OpenEnvRolloutProcessor] Parsing action...", flush=True) + logger.debug( + "[OpenEnvRolloutProcessor] Model output: '%s'", + preview[:120] if preview else "", + ) + + logger.debug("[OpenEnvRolloutProcessor] Parsing action") action = self.action_parser(assistant_message) label = getattr(action, "action_str", None) or str(action) - print(f"[OpenEnvRolloutProcessor] Parsed action: '{label[:120]}'", flush=True) - + logger.debug("[OpenEnvRolloutProcessor] Parsed action: '%s'", label[:120]) + # Add assistant message (original content) messages.append(Message(role="assistant", content=assistant_message)) - + # Execute action in environment (OpenEnv standard interface!) with transient-error retries - print(f"[OpenEnvRolloutProcessor] Executing action in environment...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Executing action in environment") step_attempts = 2 step_delay = 0.5 for si in range(step_attempts): try: result = env.step(action) - print(f"[OpenEnvRolloutProcessor] env.step() succeeded", flush=True) + logger.debug("[OpenEnvRolloutProcessor] env.step() succeeded") break except Exception as se: if si == step_attempts - 1: - print(f"[OpenEnvRolloutProcessor] env.step() failed after {step_attempts} attempts: {se}", flush=True) + logger.error( + "[OpenEnvRolloutProcessor] env.step() failed after %d attempts: %s", + step_attempts, + se, + ) raise time.sleep(step_delay) - + # Collect reward (OpenEnv standard: result.reward) reward = float(result.reward or 0.0) step_rewards.append(reward) - print(f"[OpenEnvRolloutProcessor] Step {step + 1}: reward={reward:.3f}, done={result.done}", flush=True) - + logger.debug( + "[OpenEnvRolloutProcessor] Step %d: reward=%.3f, done=%s", + step + 1, + reward, + result.done, + ) + _action_label = getattr(action, "action_str", None) if not _action_label: try: @@ -338,13 +365,15 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: except Exception: _action_label = "" logger.debug(f"Step {step}: action={_action_label}, reward={reward}") - + # Update observation (OpenEnv standard: result.observation) observation = result.observation - + # Update history for next prompt error_flag = getattr(observation, "last_action_error", False) - history_line = f"Step {step + 1}: {_action_label} -> reward {reward:+.2f}{' ERROR' if error_flag else ''}" + history_line = ( + f"Step {step + 1}: {_action_label} -> reward {reward:+.2f}{' ERROR' if error_flag else ''}" + ) history.append(history_line) # Optional tracing if getattr(config, "logger", None): @@ -354,12 +383,13 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: row_for_log = row.model_copy(deep=True) # pydantic v2 except Exception: import copy as _copy + row_for_log = _copy.deepcopy(row) row_for_log.messages = list(messages) config.logger.log(row_for_log) except Exception: pass - + # Update row with results row.messages = messages row.execution_metadata.usage = CompletionUsage( @@ -368,18 +398,21 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: total_tokens=usage["total_tokens"], ) row.execution_metadata.duration_seconds = time.perf_counter() - start_time - + # Store rewards for TRL reward functions via a system message sentinel sentinel = "__ep_step_rewards__:" + json.dumps(step_rewards) messages.append(Message(role="system", content=sentinel)) - + total_reward = sum(step_rewards) - print(f"\n[OpenEnvRolloutProcessor] ✅ ROLLOUT COMPLETE", flush=True) - print(f"[OpenEnvRolloutProcessor] Steps: {len(step_rewards)}", flush=True) - print(f"[OpenEnvRolloutProcessor] Total reward: {total_reward:.3f}", flush=True) - print(f"[OpenEnvRolloutProcessor] Duration: {row.execution_metadata.duration_seconds:.2f}s", flush=True) - print(f"[OpenEnvRolloutProcessor] Messages collected: {len(messages)}", flush=True) - + logger.info("[OpenEnvRolloutProcessor] ✅ ROLLOUT COMPLETE") + logger.info("[OpenEnvRolloutProcessor] Steps: %d", len(step_rewards)) + logger.info("[OpenEnvRolloutProcessor] Total reward: %.3f", total_reward) + logger.info( + "[OpenEnvRolloutProcessor] Duration: %.2fs", + row.execution_metadata.duration_seconds, + ) + logger.debug("[OpenEnvRolloutProcessor] Messages collected: %d", len(messages)) + logger.info( f"Rollout complete: {len(step_rewards)} steps, " f"total_reward={total_reward:.2f}, " @@ -391,36 +424,43 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: config.logger.log(row) except Exception: pass - + return row - + except Exception as e: logger.error(f"Error in rollout: {e}", exc_info=True) - print(f"\n[OpenEnvRolloutProcessor] ❌ ERROR in rollout: {type(e).__name__}: {e}", flush=True) + logger.error( + "[OpenEnvRolloutProcessor] ❌ ERROR in rollout: %s: %s", + type(e).__name__, + e, + ) raise finally: # Cleanup environment - print("[OpenEnvRolloutProcessor] Closing environment client...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Closing environment client") try: env.close() - print("[OpenEnvRolloutProcessor] Environment closed successfully.", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Environment closed successfully") except Exception as close_err: - print(f"[OpenEnvRolloutProcessor] Error closing environment: {close_err}", flush=True) - + logger.warning( + "[OpenEnvRolloutProcessor] Error closing environment: %s", + close_err, + ) + async def _sem_wrapper(r: EvaluationRow) -> EvaluationRow: async with semaphore: return await process_row(r) - + # Create and return tasks - print(f"[OpenEnvRolloutProcessor] Creating {len(rows)} async tasks...", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Creating %d async tasks", len(rows)) tasks = [asyncio.create_task(_sem_wrapper(row)) for row in rows] - print(f"[OpenEnvRolloutProcessor] Returning {len(tasks)} tasks", flush=True) + logger.debug("[OpenEnvRolloutProcessor] Returning %d tasks", len(tasks)) return tasks def _build_prompt(self, observation_text: str, step: int) -> str: """ Build prompt for LLM from observation text. - + Generic prompt that works for any environment. """ return ( @@ -443,12 +483,13 @@ def _build_env_factory(self) -> Callable[[], Any]: # If a generic client class is provided, use it if self._env_client_cls is not None: + def _generic_factory(): if self._env_base_url: - try: - print(f"[OpenEnvRolloutProcessor] Using env_client_cls base_url={self._env_base_url}") - except Exception: - pass + logger.debug( + "[OpenEnvRolloutProcessor] Using env_client_cls base_url=%s", + self._env_base_url, + ) return self._env_client_cls( # type: ignore[call-arg] base_url=self._env_base_url, request_timeout_s=self._request_timeout_s, @@ -460,69 +501,53 @@ def _generic_factory(): # ------------------------------ docker_kwargs: Dict[str, Any] = {} - # 1) Build default BrowserGym-style env vars - env_vars_default: Dict[str, str] = { - "BROWSERGYM_BENCHMARK": str(self._benchmark), - "BROWSERGYM_HEADLESS": "true" if self._headless else "false", - "BROWSERGYM_VIEWPORT_WIDTH": str(self._viewport_width), - "BROWSERGYM_VIEWPORT_HEIGHT": str(self._viewport_height), - "BROWSERGYM_TIMEOUT": str(int(self._timeout_ms)), - # Keep obs/info flags consistent for BrowserGym - "BROWSERGYM_OBS_AXTREE": "1", - "BROWSERGYM_OBS_PRUNED_HTML": "1", - "BROWSERGYM_RETURN_INFO": "1", - } + env_vars_default: Dict[str, str] = dict(self._env_vars) - # 2) Select task for this env instance (if provided), grouped by num_generations + # Select task for this env instance (if provided), grouped by num_generations selected_task: Optional[str] = None if self._tasks: idx = self._env_create_idx self._env_create_idx = idx + 1 group = idx // max(1, self._num_generations) selected_task = self._tasks[group % len(self._tasks)] - env_vars_default["BROWSERGYM_TASK_NAME"] = str(selected_task) - try: - print( - "[OpenEnvRolloutProcessor] Task selection:" - f" idx={idx}, group={group}, num_generations={self._num_generations}," - f" selected_task={selected_task}, tasks={self._tasks}", - flush=True, - ) - except Exception: - pass - - # 3) MiniWoB URL (if provided) - if self._miniwob_url: - env_vars_default["MINIWOB_URL"] = str(self._miniwob_url) - - # 4) Merge user-provided env vars (override defaults) - if self._env_vars: - env_vars_default.update({k: str(v) for k, v in self._env_vars.items()}) + if not self._task_var: + raise ValueError("task_var must be provided when tasks are configured.") + env_vars_default[self._task_var] = str(selected_task) + logger.debug( + "[OpenEnvRolloutProcessor] Task selection: idx=%d, group=%d, num_generations=%d, selected_task=%s, tasks=%s", + idx, + group, + self._num_generations, + selected_task, + self._tasks, + ) - docker_kwargs["env_vars"] = env_vars_default + if env_vars_default: + docker_kwargs["env_vars"] = env_vars_default if self._docker_port is not None: docker_kwargs["port"] = int(self._docker_port) if self._hub_repo_id: - try: - print(f"[OpenEnvRolloutProcessor] Launching from_hub repo_id='{self._hub_repo_id}' ...") - except Exception: - pass + logger.debug( + "[OpenEnvRolloutProcessor] Launching from_hub repo_id='%s' ...", + self._hub_repo_id, + ) return self._env_client_cls.from_hub( # type: ignore[attr-defined] self._hub_repo_id, provider=self._provider, **docker_kwargs, ) else: - try: - print(f"[OpenEnvRolloutProcessor] Launching from_docker_image image='{self._docker_image}' ...") - except Exception: - pass + logger.debug( + "[OpenEnvRolloutProcessor] Launching from_docker_image image='%s' ...", + self._docker_image, + ) return self._env_client_cls.from_docker_image( # type: ignore[attr-defined] self._docker_image, provider=self._provider, **docker_kwargs, ) + return _generic_factory # No fallback: require an env_factory or env_client_cls diff --git a/pyproject.toml b/pyproject.toml index e13e813e..bf8623be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,6 +130,11 @@ langchain = [ braintrust = [ "braintrust[otel]", ] +openenv = [ + "openenv-core", + "openenv @ git+https://github.com/meta-pytorch/OpenEnv.git", + "openenv-browsergym-env @ git+https://github.com/meta-pytorch/OpenEnv.git#subdirectory=src/envs/browsergym_env", +] # Optional deps for LangGraph example/tests langgraph = [ diff --git a/tests/pytest/test_openenv_browsergym_basic.py b/tests/pytest/test_openenv_browsergym_basic.py index 75de7643..f87a663b 100644 --- a/tests/pytest/test_openenv_browsergym_basic.py +++ b/tests/pytest/test_openenv_browsergym_basic.py @@ -28,21 +28,36 @@ def test_openenv_browsergym_basic(): # Use tasks that are known to exist; requires MiniWoB server reachable from containers. tasks = ["click-test"] miniwob_url = os.getenv("MINIWOB_URL", "http://172.17.0.1:8888/miniwob/") + browsergym_env_vars: Dict[str, str] = { + "BROWSERGYM_BENCHMARK": "miniwob", + "BROWSERGYM_HEADLESS": "true", + "BROWSERGYM_VIEWPORT_WIDTH": "1280", + "BROWSERGYM_VIEWPORT_HEIGHT": "720", + "BROWSERGYM_TIMEOUT": "10000", + "BROWSERGYM_OBS_AXTREE": "1", + "BROWSERGYM_OBS_PRUNED_HTML": "1", + "BROWSERGYM_RETURN_INFO": "1", + } + if miniwob_url: + browsergym_env_vars["MINIWOB_URL"] = miniwob_url # Construct the processor with a trivial action_parser; the model output will still be generated # but we parse to a safe noop action to minimize flakiness for the environment step. - from envs.browsergym_env import BrowserGymAction # type: ignore + from envs.browsergym_env import BrowserGymAction, BrowserGymEnv # type: ignore processor = OpenEnvRolloutProcessor( env_factory=None, prompt_builder=lambda obs, step, history: "Do nothing", action_parser=lambda text: BrowserGymAction(action_str="noop()"), + env_client_cls=BrowserGymEnv, tasks=tasks, + task_var="BROWSERGYM_TASK_NAME", miniwob_url=miniwob_url, docker_image="browsergym-env:latest", benchmark="miniwob", timeout_ms=10000, num_generations=1, + env_vars=browsergym_env_vars, ) # Completion params: rely on an available provider/model in the environment @@ -61,11 +76,13 @@ def test_openenv_browsergym_basic(): completion_params=completion_params, semaphore=asyncio.Semaphore(1), steps=1, + mcp_config_path="", ) loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: + async def _run_all(): tasks_ = processor(rows, config) return await asyncio.gather(*tasks_) @@ -80,4 +97,3 @@ async def _run_all(): assert row is not None assert row.execution_metadata is not None assert getattr(row.execution_metadata, "duration_seconds", 0.0) >= 0.0 - diff --git a/tests/pytest/test_openenv_browsergym_eval.py b/tests/pytest/test_openenv_browsergym_eval.py index 0de81e20..ac2c183f 100644 --- a/tests/pytest/test_openenv_browsergym_eval.py +++ b/tests/pytest/test_openenv_browsergym_eval.py @@ -6,7 +6,6 @@ from eval_protocol.models import EvaluationRow, Message, EvaluateResult from eval_protocol.pytest import evaluation_test from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor -import pytest # Skip these integration-heavy tests on CI runners by default pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI") @@ -89,7 +88,7 @@ def _extract_clickable_elements_lines(observation: Any) -> List[str]: bbox_str = ", ".join(str(v) for v in bbox) if bbox else "?" role, name = bid_to_desc.get(str(bid), ("", "")) focus_tag = " [FOCUSED]" if (str(bid) == str(focused_bid)) else "" - rn = (role or "-") + rn = role or "-" if name: rn = f"{rn} | {name}" vis = props.get("visibility") @@ -145,7 +144,7 @@ def _rank_clickables_lines(observation: Any, goal: str, top_n: int = 8) -> tuple pass bbox = props.get("bbox") or [] bbox_str = ", ".join(str(v) for v in bbox) if bbox else "?" - rn = (role or "-") + rn = role or "-" if name: rn = f"{rn} | {name}" vis_str = f"{vis:.2f}" if isinstance(vis, (int, float)) else str(vis) if vis is not None else "?" @@ -217,18 +216,23 @@ def action_parser(response_text: str): try: from envs.browsergym_env import BrowserGymEnv # type: ignore + _HAS_BG = True except Exception: _HAS_BG = False +OPENENV_BROWSERGYM_INLINE_DATA: List[Dict[str, Any]] = [ + {"id": "click-test", "prompt": "start"}, +] + + @evaluation_test( # type: ignore[misc] - input_dataset=["tests/pytest/data/openenv_browsergym_dataset.jsonl"], - dataset_adapter=openenv_dataset_to_rows, + input_rows=[openenv_dataset_to_rows(OPENENV_BROWSERGYM_INLINE_DATA)], completion_params=[ { "temperature": 0.0, - "max_tokens": 32, + "max_tokens": 512, "model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", } ], @@ -241,20 +245,24 @@ def action_parser(response_text: str): env_client_cls=BrowserGymEnv if _HAS_BG else None, prompt_builder=prompt_builder, action_parser=action_parser, - tasks=[ - "click-test", - "click-button", - "click-button-sequence", - "click-checkboxes", - "click-checkboxes-soft", - "click-checkboxes-large", - "click-checkboxes-transfer", - ], - miniwob_url=os.getenv("MINIWOB_URL", "http://172.17.0.1:8888/miniwob/"), + tasks=["click-test"], + task_var="BROWSERGYM_TASK_NAME", + miniwob_url=os.getenv("MINIWOB_URL", "http://host.docker.internal:8888/miniwob/"), docker_image="browsergym-env:latest", benchmark="miniwob", timeout_ms=10000, num_generations=1, + env_vars={ + "BROWSERGYM_BENCHMARK": "miniwob", + "BROWSERGYM_HEADLESS": "true", + "BROWSERGYM_VIEWPORT_WIDTH": "1280", + "BROWSERGYM_VIEWPORT_HEIGHT": "720", + "BROWSERGYM_TIMEOUT": "10000", + "BROWSERGYM_OBS_AXTREE": "1", + "BROWSERGYM_OBS_PRUNED_HTML": "1", + "BROWSERGYM_RETURN_INFO": "1", + "MINIWOB_URL": os.getenv("MINIWOB_URL", "http://host.docker.internal:8888/miniwob/"), + }, ) if _HAS_BG else None @@ -271,8 +279,13 @@ def test_openenv_browsergym_eval(row: EvaluationRow) -> EvaluationRow: step_rewards: List[float] = [] try: for msg in row.messages or []: - if msg.role == "system" and isinstance(msg.content, str) and msg.content.startswith("__ep_step_rewards__:"): + if ( + msg.role == "system" + and isinstance(msg.content, str) + and msg.content.startswith("__ep_step_rewards__:") + ): import json as _json + payload = msg.content.split(":", 1)[1] step_rewards = _json.loads(payload) or [] break @@ -285,4 +298,3 @@ def test_openenv_browsergym_eval(row: EvaluationRow) -> EvaluationRow: reason = f"Total reward={total:.2f} across {len(step_rewards)} steps" row.evaluation_result = EvaluateResult(score=score, reason=reason) return row - diff --git a/uv.lock b/uv.lock index e5f6e6dc..d8e6954d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.13'", @@ -162,6 +162,15 @@ speedups = [ { name = "brotlicffi", marker = "platform_python_implementation != 'CPython'" }, ] +[[package]] +name = "aiolimiter" +version = "1.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/23/b52debf471f7a1e42e362d959a3982bdcb4fe13a5d46e63d28868807a79c/aiolimiter-1.2.1.tar.gz", hash = "sha256:e02a37ea1a855d9e832252a105420ad4d15011505512a1a1d814647451b5cca9", size = 7185, upload-time = "2024-12-08T15:31:51.496Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/ba/df6e8e1045aebc4778d19b8a3a9bc1808adb1619ba94ca354d9ba17d86c3/aiolimiter-1.2.1-py3-none-any.whl", hash = "sha256:d3f249e9059a20badcb56b61601a83556133655c11d1eb3dd3e04ff069e5f3c7", size = 6711, upload-time = "2024-12-08T15:31:49.874Z" }, +] + [[package]] name = "aiosignal" version = "1.4.0" @@ -407,6 +416,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/e5/edf168b8dd936bb82a97ebb76e7295c94a4f9d1c2e8e8a04696ef2b3a524/basedpyright-1.31.3-py3-none-any.whl", hash = "sha256:bdb0b5a9abe287a023d330fc71eaed181aaffd48f1dec59567f912cf716f38ff", size = 11722347, upload-time = "2025-08-20T15:08:20.528Z" }, ] +[[package]] +name = "beartype" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/78/bf7a20712dbf3a18e37f9ae1c2a1e04169c43b4628871f40f1db5f29c257/beartype-0.12.0.tar.gz", hash = "sha256:3b7545b3f333a6b07042b68b102141554c9add2e979dab7b0f8ed6378f7af7d7", size = 1029204, upload-time = "2023-01-17T08:36:34.367Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/1f/d066e5689e91e3c8845d275d85d25b23f2c7c78c9523d852b4b73be73875/beartype-0.12.0-py3-none-any.whl", hash = "sha256:3d9d5bec198bcf965c000d7b5120bebdd19a444ef6e39e97d0e93eb8832e10c8", size = 754537, upload-time = "2023-01-17T08:36:27.714Z" }, +] + [[package]] name = "beautifulsoup4" version = "4.13.4" @@ -457,6 +475,15 @@ css = [ { name = "tinycss2" }, ] +[[package]] +name = "blinker" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, +] + [[package]] name = "boto3" version = "1.40.17" @@ -610,6 +637,51 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/a9/0971251c4427c14b2a827dba3d910d4d3330dabf23d4278bf6d06a978847/brotlicffi-1.1.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce01c7316aebc7fce59da734286148b1d1b9455f89cf2c8a4dfce7d41db55c2d", size = 361760, upload-time = "2023-09-14T14:22:14.767Z" }, ] +[[package]] +name = "browsergym-core" +version = "0.14.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "gymnasium" }, + { name = "lxml" }, + { name = "mcp", extra = ["cli"] }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pillow" }, + { name = "playwright" }, + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/14/3c/c2e801c650671b8b23e54fd15d1146136cdb1a8ed0798f7d833b887403fc/browsergym_core-0.14.2.tar.gz", hash = "sha256:aa99a56aa6aae74bb3e1c139ae2fe7d53f0a5bed8707e0ee7520daed531f1f52", size = 184589, upload-time = "2025-08-05T18:28:07.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/06/9d9760d3215ca2980a5952d939dbb210472e73613286b5a756c140709b3b/browsergym_core-0.14.2-py3-none-any.whl", hash = "sha256:217dfae3d8f6a92e4502b4dfd97dc5ec955a91e5f6b45944f857c182a57168d0", size = 191590, upload-time = "2025-08-05T18:27:59.065Z" }, +] + +[[package]] +name = "browsergym-miniwob" +version = "0.14.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "browsergym-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/9e/609312f140cbad18c24b6b32bfd4306c226912017ebacc3c01d0156531dc/browsergym_miniwob-0.14.2.tar.gz", hash = "sha256:00ea1f820124689f086830323ea610fec5207e7f1718c86d1fc69e0eb385d939", size = 9318, upload-time = "2025-08-05T18:28:10.625Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/e7/ffab662108aa12e3691b9dc096ae9d4c96daf7f6e8f45280f853a07fda7e/browsergym_miniwob-0.14.2-py3-none-any.whl", hash = "sha256:bc99712c11e39d46c11c5431d57a121854f141291ab16d62e329a1dca0cea974", size = 9526, upload-time = "2025-08-05T18:28:01.628Z" }, +] + +[[package]] +name = "browsergym-webarena" +version = "0.14.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "browsergym-core" }, + { name = "libwebarena" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1a/7a/65398abd7e2afac58d240d33b461b662619fa05a8a5a4922766cdcd13e5b/browsergym_webarena-0.14.2.tar.gz", hash = "sha256:ccc741ea6a6d4e0d4022fc3c0e7c50d2ee7edc2076a3c50b277005eb572f4c65", size = 7755, upload-time = "2025-08-05T18:28:12.944Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/e8/cb96c163b3493196dae8b3920af7fb75fb2f02d82431623ae9534c0e9070/browsergym_webarena-0.14.2-py3-none-any.whl", hash = "sha256:d9bd8fb4e64627a57134fe205497aa36c5e39ffcafd255b8511ba31983478cff", size = 8093, upload-time = "2025-08-05T18:28:03.983Z" }, +] + [[package]] name = "build" version = "1.2.2.post1" @@ -1310,6 +1382,11 @@ langgraph-tools = [ langsmith = [ { name = "langsmith" }, ] +openenv = [ + { name = "openenv" }, + { name = "openenv-browsergym-env" }, + { name = "openenv-core" }, +] openevals = [ { name = "openevals" }, ] @@ -1386,6 +1463,9 @@ requires-dist = [ { name = "omegaconf", specifier = ">=2.3.0" }, { name = "openai", specifier = ">=1.78.1" }, { name = "openai", marker = "extra == 'dev'", specifier = ">=1.78.1" }, + { name = "openenv", marker = "extra == 'openenv'", git = "https://github.com/meta-pytorch/OpenEnv.git" }, + { name = "openenv-browsergym-env", marker = "extra == 'openenv'", git = "https://github.com/meta-pytorch/OpenEnv.git?subdirectory=src%2Fenvs%2Fbrowsergym_env" }, + { name = "openenv-core", marker = "extra == 'openenv'" }, { name = "openevals", marker = "extra == 'openevals'", specifier = ">=0.1.0" }, { name = "pandas", marker = "extra == 'dev'", specifier = ">=1.5.0" }, { name = "peewee", specifier = ">=3.18.2" }, @@ -1430,7 +1510,7 @@ requires-dist = [ { name = "websockets", specifier = ">=15.0.1" }, { name = "werkzeug", marker = "extra == 'dev'", specifier = ">=2.0.0" }, ] -provides-extras = ["dev", "trl", "openevals", "fireworks", "box2d", "langfuse", "huggingface", "langsmith", "bigquery", "svgbench", "pydantic", "supabase", "chinook", "langchain", "braintrust", "langgraph", "langgraph-tools", "proxy"] +provides-extras = ["dev", "trl", "openevals", "fireworks", "box2d", "langfuse", "huggingface", "langsmith", "bigquery", "svgbench", "pydantic", "supabase", "chinook", "langchain", "braintrust", "openenv", "langgraph", "langgraph-tools", "proxy"] [package.metadata.requires-dev] dev = [ @@ -1450,6 +1530,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830, upload-time = "2024-12-21T20:09:44.175Z" }, ] +[[package]] +name = "evaluate" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "datasets" }, + { name = "dill" }, + { name = "fsspec", extra = ["http"] }, + { name = "huggingface-hub" }, + { name = "multiprocess" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "xxhash" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/d0/0c17a8e6e8dc7245f22dea860557c32bae50fc4d287ae030cb0e8ab8720f/evaluate-0.4.6.tar.gz", hash = "sha256:e07036ca12b3c24331f83ab787f21cc2dbf3631813a1631e63e40897c69a3f21", size = 65716, upload-time = "2025-09-18T13:06:30.581Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3e/af/3e990d8d4002bbc9342adb4facd59506e653da93b2417de0fa6027cb86b1/evaluate-0.4.6-py3-none-any.whl", hash = "sha256:bca85bc294f338377b7ac2f861e21c308b11b2a285f510d7d5394d5df437db29", size = 84069, upload-time = "2025-09-18T13:06:29.265Z" }, +] + [[package]] name = "exceptiongroup" version = "1.3.0" @@ -1655,6 +1758,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/3c/ea7615940131402570b1fdf79472c1328ce71ca17b60ab16ae8ead7ef53d/fireworks_ai-0.19.19-py3-none-any.whl", hash = "sha256:a375304c4e1fa8f2e8d32b8edf53bdc4eb9f55cd0e9085c0866e479aaa1880a1", size = 570660, upload-time = "2025-09-09T22:08:49.387Z" }, ] +[[package]] +name = "flask" +version = "3.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "blinker" }, + { name = "click" }, + { name = "itsdangerous" }, + { name = "jinja2" }, + { name = "markupsafe" }, + { name = "werkzeug" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/6d/cfe3c0fcc5e477df242b98bfe186a4c34357b4847e87ecaef04507332dab/flask-3.1.2.tar.gz", hash = "sha256:bf656c15c80190ed628ad08cdfd3aaa35beb087855e2f494910aa3774cc4fd87", size = 720160, upload-time = "2025-08-19T21:03:21.205Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/f9/7f9263c5695f4bd0023734af91bedb2ff8209e8de6ead162f35d8dc762fd/flask-3.1.2-py3-none-any.whl", hash = "sha256:ca1d8112ec8a6158cc29ea4858963350011b5c846a414cdb7a954aa9e967d03c", size = 103308, upload-time = "2025-08-19T21:03:19.499Z" }, +] + [[package]] name = "fqdn" version = "1.5.1" @@ -1957,53 +2077,37 @@ wheels = [ [[package]] name = "greenlet" -version = "3.2.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c9/92/bb85bd6e80148a4d2e0c59f7c0c2891029f8fd510183afc7d8d2feeed9b6/greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365", size = 185752, upload-time = "2025-06-05T16:16:09.955Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/92/db/b4c12cff13ebac2786f4f217f06588bccd8b53d260453404ef22b121fc3a/greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be", size = 268977, upload-time = "2025-06-05T16:10:24.001Z" }, - { url = "https://files.pythonhosted.org/packages/52/61/75b4abd8147f13f70986df2801bf93735c1bd87ea780d70e3b3ecda8c165/greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac", size = 627351, upload-time = "2025-06-05T16:38:50.685Z" }, - { url = "https://files.pythonhosted.org/packages/35/aa/6894ae299d059d26254779a5088632874b80ee8cf89a88bca00b0709d22f/greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392", size = 638599, upload-time = "2025-06-05T16:41:34.057Z" }, - { url = "https://files.pythonhosted.org/packages/30/64/e01a8261d13c47f3c082519a5e9dbf9e143cc0498ed20c911d04e54d526c/greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c", size = 634482, upload-time = "2025-06-05T16:48:16.26Z" }, - { url = "https://files.pythonhosted.org/packages/47/48/ff9ca8ba9772d083a4f5221f7b4f0ebe8978131a9ae0909cf202f94cd879/greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db", size = 633284, upload-time = "2025-06-05T16:13:01.599Z" }, - { url = "https://files.pythonhosted.org/packages/e9/45/626e974948713bc15775b696adb3eb0bd708bec267d6d2d5c47bb47a6119/greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b", size = 582206, upload-time = "2025-06-05T16:12:48.51Z" }, - { url = "https://files.pythonhosted.org/packages/b1/8e/8b6f42c67d5df7db35b8c55c9a850ea045219741bb14416255616808c690/greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712", size = 1111412, upload-time = "2025-06-05T16:36:45.479Z" }, - { url = "https://files.pythonhosted.org/packages/05/46/ab58828217349500a7ebb81159d52ca357da747ff1797c29c6023d79d798/greenlet-3.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:731e154aba8e757aedd0781d4b240f1225b075b4409f1bb83b05ff410582cf00", size = 1135054, upload-time = "2025-06-05T16:12:36.478Z" }, - { url = "https://files.pythonhosted.org/packages/68/7f/d1b537be5080721c0f0089a8447d4ef72839039cdb743bdd8ffd23046e9a/greenlet-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:96c20252c2f792defe9a115d3287e14811036d51e78b3aaddbee23b69b216302", size = 296573, upload-time = "2025-06-05T16:34:26.521Z" }, - { url = "https://files.pythonhosted.org/packages/fc/2e/d4fcb2978f826358b673f779f78fa8a32ee37df11920dc2bb5589cbeecef/greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822", size = 270219, upload-time = "2025-06-05T16:10:10.414Z" }, - { url = "https://files.pythonhosted.org/packages/16/24/929f853e0202130e4fe163bc1d05a671ce8dcd604f790e14896adac43a52/greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83", size = 630383, upload-time = "2025-06-05T16:38:51.785Z" }, - { url = "https://files.pythonhosted.org/packages/d1/b2/0320715eb61ae70c25ceca2f1d5ae620477d246692d9cc284c13242ec31c/greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf", size = 642422, upload-time = "2025-06-05T16:41:35.259Z" }, - { url = "https://files.pythonhosted.org/packages/bd/49/445fd1a210f4747fedf77615d941444349c6a3a4a1135bba9701337cd966/greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b", size = 638375, upload-time = "2025-06-05T16:48:18.235Z" }, - { url = "https://files.pythonhosted.org/packages/7e/c8/ca19760cf6eae75fa8dc32b487e963d863b3ee04a7637da77b616703bc37/greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147", size = 637627, upload-time = "2025-06-05T16:13:02.858Z" }, - { url = "https://files.pythonhosted.org/packages/65/89/77acf9e3da38e9bcfca881e43b02ed467c1dedc387021fc4d9bd9928afb8/greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5", size = 585502, upload-time = "2025-06-05T16:12:49.642Z" }, - { url = "https://files.pythonhosted.org/packages/97/c6/ae244d7c95b23b7130136e07a9cc5aadd60d59b5951180dc7dc7e8edaba7/greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc", size = 1114498, upload-time = "2025-06-05T16:36:46.598Z" }, - { url = "https://files.pythonhosted.org/packages/89/5f/b16dec0cbfd3070658e0d744487919740c6d45eb90946f6787689a7efbce/greenlet-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:751261fc5ad7b6705f5f76726567375bb2104a059454e0226e1eef6c756748ba", size = 1139977, upload-time = "2025-06-05T16:12:38.262Z" }, - { url = "https://files.pythonhosted.org/packages/66/77/d48fb441b5a71125bcac042fc5b1494c806ccb9a1432ecaa421e72157f77/greenlet-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:83a8761c75312361aa2b5b903b79da97f13f556164a7dd2d5448655425bd4c34", size = 297017, upload-time = "2025-06-05T16:25:05.225Z" }, - { url = "https://files.pythonhosted.org/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d", size = 271992, upload-time = "2025-06-05T16:11:23.467Z" }, - { url = "https://files.pythonhosted.org/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b", size = 638820, upload-time = "2025-06-05T16:38:52.882Z" }, - { url = "https://files.pythonhosted.org/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d", size = 653046, upload-time = "2025-06-05T16:41:36.343Z" }, - { url = "https://files.pythonhosted.org/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264", size = 647701, upload-time = "2025-06-05T16:48:19.604Z" }, - { url = "https://files.pythonhosted.org/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688", size = 649747, upload-time = "2025-06-05T16:13:04.628Z" }, - { url = "https://files.pythonhosted.org/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb", size = 605461, upload-time = "2025-06-05T16:12:50.792Z" }, - { url = "https://files.pythonhosted.org/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c", size = 1121190, upload-time = "2025-06-05T16:36:48.59Z" }, - { url = "https://files.pythonhosted.org/packages/f5/e1/25297f70717abe8104c20ecf7af0a5b82d2f5a980eb1ac79f65654799f9f/greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163", size = 1149055, upload-time = "2025-06-05T16:12:40.457Z" }, - { url = "https://files.pythonhosted.org/packages/1f/8f/8f9e56c5e82eb2c26e8cde787962e66494312dc8cb261c460e1f3a9c88bc/greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849", size = 297817, upload-time = "2025-06-05T16:29:49.244Z" }, - { url = "https://files.pythonhosted.org/packages/b1/cf/f5c0b23309070ae93de75c90d29300751a5aacefc0a3ed1b1d8edb28f08b/greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad", size = 270732, upload-time = "2025-06-05T16:10:08.26Z" }, - { url = "https://files.pythonhosted.org/packages/48/ae/91a957ba60482d3fecf9be49bc3948f341d706b52ddb9d83a70d42abd498/greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef", size = 639033, upload-time = "2025-06-05T16:38:53.983Z" }, - { url = "https://files.pythonhosted.org/packages/6f/df/20ffa66dd5a7a7beffa6451bdb7400d66251374ab40b99981478c69a67a8/greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3", size = 652999, upload-time = "2025-06-05T16:41:37.89Z" }, - { url = "https://files.pythonhosted.org/packages/51/b4/ebb2c8cb41e521f1d72bf0465f2f9a2fd803f674a88db228887e6847077e/greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95", size = 647368, upload-time = "2025-06-05T16:48:21.467Z" }, - { url = "https://files.pythonhosted.org/packages/8e/6a/1e1b5aa10dced4ae876a322155705257748108b7fd2e4fae3f2a091fe81a/greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb", size = 650037, upload-time = "2025-06-05T16:13:06.402Z" }, - { url = "https://files.pythonhosted.org/packages/26/f2/ad51331a157c7015c675702e2d5230c243695c788f8f75feba1af32b3617/greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b", size = 608402, upload-time = "2025-06-05T16:12:51.91Z" }, - { url = "https://files.pythonhosted.org/packages/26/bc/862bd2083e6b3aff23300900a956f4ea9a4059de337f5c8734346b9b34fc/greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0", size = 1119577, upload-time = "2025-06-05T16:36:49.787Z" }, - { url = "https://files.pythonhosted.org/packages/86/94/1fc0cc068cfde885170e01de40a619b00eaa8f2916bf3541744730ffb4c3/greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36", size = 1147121, upload-time = "2025-06-05T16:12:42.527Z" }, - { url = "https://files.pythonhosted.org/packages/27/1a/199f9587e8cb08a0658f9c30f3799244307614148ffe8b1e3aa22f324dea/greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3", size = 297603, upload-time = "2025-06-05T16:20:12.651Z" }, - { url = "https://files.pythonhosted.org/packages/d8/ca/accd7aa5280eb92b70ed9e8f7fd79dc50a2c21d8c73b9a0856f5b564e222/greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86", size = 271479, upload-time = "2025-06-05T16:10:47.525Z" }, - { url = "https://files.pythonhosted.org/packages/55/71/01ed9895d9eb49223280ecc98a557585edfa56b3d0e965b9fa9f7f06b6d9/greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97", size = 683952, upload-time = "2025-06-05T16:38:55.125Z" }, - { url = "https://files.pythonhosted.org/packages/ea/61/638c4bdf460c3c678a0a1ef4c200f347dff80719597e53b5edb2fb27ab54/greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728", size = 696917, upload-time = "2025-06-05T16:41:38.959Z" }, - { url = "https://files.pythonhosted.org/packages/22/cc/0bd1a7eb759d1f3e3cc2d1bc0f0b487ad3cc9f34d74da4b80f226fde4ec3/greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a", size = 692443, upload-time = "2025-06-05T16:48:23.113Z" }, - { url = "https://files.pythonhosted.org/packages/67/10/b2a4b63d3f08362662e89c103f7fe28894a51ae0bc890fabf37d1d780e52/greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892", size = 692995, upload-time = "2025-06-05T16:13:07.972Z" }, - { url = "https://files.pythonhosted.org/packages/5a/c6/ad82f148a4e3ce9564056453a71529732baf5448ad53fc323e37efe34f66/greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141", size = 655320, upload-time = "2025-06-05T16:12:53.453Z" }, - { url = "https://files.pythonhosted.org/packages/5c/4f/aab73ecaa6b3086a4c89863d94cf26fa84cbff63f52ce9bc4342b3087a06/greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a", size = 301236, upload-time = "2025-06-05T16:15:20.111Z" }, +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/17/14/3bddb1298b9a6786539ac609ba4b7c9c0842e12aa73aaa4d8d73ec8f8185/greenlet-3.0.3.tar.gz", hash = "sha256:43374442353259554ce33599da8b692d5aa96f8976d567d4badf263371fbe491", size = 182013, upload-time = "2023-12-21T22:02:54.659Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/64/bea53c592e3e45799f7c8039a8ee7d6883c518eafef1fcae60beb776070f/greenlet-3.0.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9da2bd29ed9e4f15955dd1595ad7bc9320308a3b766ef7f837e23ad4b4aac31a", size = 270098, upload-time = "2023-12-21T22:01:50.6Z" }, + { url = "https://files.pythonhosted.org/packages/a6/d6/408ad9603339db28ce334021b1403dfcfbcb7501a435d49698408d928de7/greenlet-3.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d353cadd6083fdb056bb46ed07e4340b0869c305c8ca54ef9da3421acbdf6881", size = 651930, upload-time = "2023-12-21T22:29:24.374Z" }, + { url = "https://files.pythonhosted.org/packages/6c/90/5b14670653f7363fb3e1665f8da6d64bd4c31d53a796d09ef69f48be7273/greenlet-3.0.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dca1e2f3ca00b84a396bc1bce13dd21f680f035314d2379c4160c98153b2059b", size = 667643, upload-time = "2023-12-21T22:26:19.296Z" }, + { url = "https://files.pythonhosted.org/packages/ef/17/e8e72cabfb5a906c0d976d7fbcc88310df292beea0f816efbefdaf694284/greenlet-3.0.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ed7fb269f15dc662787f4119ec300ad0702fa1b19d2135a37c2c4de6fadfd4a", size = 659188, upload-time = "2023-12-21T22:31:32.77Z" }, + { url = "https://files.pythonhosted.org/packages/1c/2f/64628f6ae48e05f585e0eb3fb7399b52e240ef99f602107b445bf6be23ef/greenlet-3.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd4f49ae60e10adbc94b45c0b5e6a179acc1736cf7a90160b404076ee283cf83", size = 662673, upload-time = "2023-12-21T22:03:11.143Z" }, + { url = "https://files.pythonhosted.org/packages/24/35/945d5b10648fec9b20bcc6df8952d20bb3bba76413cd71c1fdbee98f5616/greenlet-3.0.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:73a411ef564e0e097dbe7e866bb2dda0f027e072b04da387282b02c308807405", size = 616002, upload-time = "2023-12-21T22:03:01.774Z" }, + { url = "https://files.pythonhosted.org/packages/74/00/27e2da76b926e9b5a2c97d3f4c0baf1b7d8181209d3026c0171f621ae6c0/greenlet-3.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:7f362975f2d179f9e26928c5b517524e89dd48530a0202570d55ad6ca5d8a56f", size = 1150603, upload-time = "2023-12-21T22:30:55.699Z" }, + { url = "https://files.pythonhosted.org/packages/e1/65/506e0a80931170b0dac1a03d36b7fc299f3fa3576235b916718602fff2c3/greenlet-3.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:649dde7de1a5eceb258f9cb00bdf50e978c9db1b996964cd80703614c86495eb", size = 1176756, upload-time = "2023-12-21T22:04:16.172Z" }, + { url = "https://files.pythonhosted.org/packages/a6/76/e1ee9f290bb0d46b09704c2fb0e609cae329eb308ad404c0ee6fa1ecb8a5/greenlet-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:68834da854554926fbedd38c76e60c4a2e3198c6fbed520b106a8986445caaf9", size = 292349, upload-time = "2023-12-21T22:33:13.902Z" }, + { url = "https://files.pythonhosted.org/packages/6e/20/68a278a6f93fa36e21cfc3d7599399a8a831225644eb3b6b18755cd3d6fc/greenlet-3.0.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:b1b5667cced97081bf57b8fa1d6bfca67814b0afd38208d52538316e9422fc61", size = 271666, upload-time = "2023-12-21T22:02:20.66Z" }, + { url = "https://files.pythonhosted.org/packages/21/b4/90e06e07c78513ab03855768200bdb35c8e764e805b3f14fb488e56f82dc/greenlet-3.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52f59dd9c96ad2fc0d5724107444f76eb20aaccb675bf825df6435acb7703559", size = 657689, upload-time = "2023-12-21T22:29:26.823Z" }, + { url = "https://files.pythonhosted.org/packages/f6/a2/0ed21078039072f9dc738bbf3af12b103a84106b1385ac4723841f846ce7/greenlet-3.0.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:afaff6cf5200befd5cec055b07d1c0a5a06c040fe5ad148abcd11ba6ab9b114e", size = 673009, upload-time = "2023-12-21T22:26:21.802Z" }, + { url = "https://files.pythonhosted.org/packages/42/11/42ad6b1104c357826bbee7d7b9e4f24dbd9fde94899a03efb004aab62963/greenlet-3.0.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe754d231288e1e64323cfad462fcee8f0288654c10bdf4f603a39ed923bef33", size = 667432, upload-time = "2023-12-21T22:31:34.29Z" }, + { url = "https://files.pythonhosted.org/packages/bb/6b/384dee7e0121cbd1757bdc1824a5ee28e43d8d4e3f99aa59521f629442fe/greenlet-3.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2797aa5aedac23af156bbb5a6aa2cd3427ada2972c828244eb7d1b9255846379", size = 667442, upload-time = "2023-12-21T22:03:13.273Z" }, + { url = "https://files.pythonhosted.org/packages/c6/1f/12d5a6cc26e8b483c2e7975f9c22e088ac735c0d8dcb8a8f72d31a4e5f04/greenlet-3.0.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7f009caad047246ed379e1c4dbcb8b020f0a390667ea74d2387be2998f58a22", size = 620032, upload-time = "2023-12-21T22:03:04.267Z" }, + { url = "https://files.pythonhosted.org/packages/c7/ec/85b647e59e0f137c7792a809156f413e38379cf7f3f2e1353c37f4be4026/greenlet-3.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c5e1536de2aad7bf62e27baf79225d0d64360d4168cf2e6becb91baf1ed074f3", size = 1154218, upload-time = "2023-12-21T22:30:57.945Z" }, + { url = "https://files.pythonhosted.org/packages/94/ed/1e5f4bca691a81700e5a88e86d6f0e538acb10188cd2cc17140e523255ef/greenlet-3.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:894393ce10ceac937e56ec00bb71c4c2f8209ad516e96033e4b3b1de270e200d", size = 1180754, upload-time = "2023-12-21T22:04:18.344Z" }, + { url = "https://files.pythonhosted.org/packages/47/79/26d54d7d700ef65b689fc2665a40846d13e834da0486674a8d4f0f371a47/greenlet-3.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:1ea188d4f49089fc6fb283845ab18a2518d279c7cd9da1065d7a84e991748728", size = 292822, upload-time = "2023-12-21T22:29:32.85Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2f/461615adc53ba81e99471303b15ac6b2a6daa8d2a0f7f77fd15605e16d5b/greenlet-3.0.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:70fb482fdf2c707765ab5f0b6655e9cfcf3780d8d87355a063547b41177599be", size = 273085, upload-time = "2023-12-21T22:03:01.176Z" }, + { url = "https://files.pythonhosted.org/packages/e9/55/2c3cfa3cdbb940cf7321fbcf544f0e9c74898eed43bf678abf416812d132/greenlet-3.0.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4d1ac74f5c0c0524e4a24335350edad7e5f03b9532da7ea4d3c54d527784f2e", size = 660514, upload-time = "2023-12-21T22:29:28.62Z" }, + { url = "https://files.pythonhosted.org/packages/38/77/efb21ab402651896c74f24a172eb4d7479f9f53898bd5e56b9e20bb24ffd/greenlet-3.0.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149e94a2dd82d19838fe4b2259f1b6b9957d5ba1b25640d2380bea9c5df37676", size = 674295, upload-time = "2023-12-21T22:26:24.101Z" }, + { url = "https://files.pythonhosted.org/packages/74/3a/92f188ace0190f0066dca3636cf1b09481d0854c46e92ec5e29c7cefe5b1/greenlet-3.0.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15d79dd26056573940fcb8c7413d84118086f2ec1a8acdfa854631084393efcc", size = 669395, upload-time = "2023-12-21T22:31:35.992Z" }, + { url = "https://files.pythonhosted.org/packages/63/0f/847ed02cdfce10f0e6e3425cd054296bddb11a17ef1b34681fa01a055187/greenlet-3.0.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b7db1ebff4ba09aaaeae6aa491daeb226c8150fc20e836ad00041bcb11230", size = 670455, upload-time = "2023-12-21T22:03:16.291Z" }, + { url = "https://files.pythonhosted.org/packages/bd/37/56b0da468a85e7704f3b2bc045015301bdf4be2184a44868c71f6dca6fe2/greenlet-3.0.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fcd2469d6a2cf298f198f0487e0a5b1a47a42ca0fa4dfd1b6862c999f018ebbf", size = 625692, upload-time = "2023-12-21T22:03:06.294Z" }, + { url = "https://files.pythonhosted.org/packages/7c/68/b5f4084c0a252d7e9c0d95fc1cfc845d08622037adb74e05be3a49831186/greenlet-3.0.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1f672519db1796ca0d8753f9e78ec02355e862d0998193038c7073045899f305", size = 1152597, upload-time = "2023-12-21T22:31:00.412Z" }, + { url = "https://files.pythonhosted.org/packages/a4/fa/31e22345518adcd69d1d6ab5087a12c178aa7f3c51103f6d5d702199d243/greenlet-3.0.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2516a9957eed41dd8f1ec0c604f1cdc86758b587d964668b5b196a9db5bfcde6", size = 1181043, upload-time = "2023-12-21T22:04:20.032Z" }, + { url = "https://files.pythonhosted.org/packages/53/80/3d94d5999b4179d91bcc93745d1b0815b073d61be79dd546b840d17adb18/greenlet-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:bba5387a6975598857d86de9eac14210a49d554a77eb8261cc68b7d082f78ce2", size = 293635, upload-time = "2023-12-21T22:26:01.555Z" }, ] [[package]] @@ -2509,6 +2613,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042", size = 11321, upload-time = "2020-11-01T10:59:58.02Z" }, ] +[[package]] +name = "itsdangerous" +version = "2.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410, upload-time = "2024-04-16T21:28:15.614Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234, upload-time = "2024-04-16T21:28:14.499Z" }, +] + [[package]] name = "jaconv" version = "0.4.0" @@ -2665,6 +2778,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" }, ] +[[package]] +name = "joblib" +version = "1.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/5d/447af5ea094b9e4c4054f82e223ada074c552335b9b4b2d14bd9b35a67c4/joblib-1.5.2.tar.gz", hash = "sha256:3faa5c39054b2f03ca547da9b2f52fde67c06240c31853f306aea97f13647b55", size = 331077, upload-time = "2025-08-27T12:15:46.575Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/e8/685f47e0d754320684db4425a0967f7d3fa70126bffd76110b7009a0090f/joblib-1.5.2-py3-none-any.whl", hash = "sha256:4e1f0bdbb987e6d843c70cf43714cb276623def372df3c22fe5266b2670bc241", size = 308396, upload-time = "2025-08-27T12:15:45.188Z" }, +] + [[package]] name = "json5" version = "0.12.0" @@ -2957,26 +3079,21 @@ wheels = [ [[package]] name = "langchain" -version = "0.3.27" +version = "1.0.7" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "async-timeout", marker = "python_full_version < '3.11'" }, { name = "langchain-core" }, - { name = "langchain-text-splitters" }, - { name = "langsmith" }, + { name = "langgraph" }, { name = "pydantic" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "sqlalchemy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/83/f6/f4f7f3a56626fe07e2bb330feb61254dbdf06c506e6b59a536a337da51cf/langchain-0.3.27.tar.gz", hash = "sha256:aa6f1e6274ff055d0fd36254176770f356ed0a8994297d1df47df341953cec62", size = 10233809, upload-time = "2025-07-24T14:42:32.959Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/7a/63c041d1ee74c505e30ec882889117baa08afce2264bcb4463929bac4e94/langchain-1.0.7.tar.gz", hash = "sha256:e3f8ad742b4cdc91d728f96bd70e4688bc11ffeca3bd160c5fe9937625d541b9", size = 465198, upload-time = "2025-11-14T20:52:21.813Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/d5/4861816a95b2f6993f1360cfb605aacb015506ee2090433a71de9cca8477/langchain-0.3.27-py3-none-any.whl", hash = "sha256:7b20c4f338826acb148d885b20a73a16e410ede9ee4f19bb02011852d5f98798", size = 1018194, upload-time = "2025-07-24T14:42:30.23Z" }, + { url = "https://files.pythonhosted.org/packages/8e/4a/02c14af46fa79ce7b02a0f8af46f5905cc7e8b647a5f1a7c793c03ac5063/langchain-1.0.7-py3-none-any.whl", hash = "sha256:cf33b4d60d7a2ff7f0f313441628927853192cdbab9d6d8ce229909a868bbf12", size = 93738, upload-time = "2025-11-14T20:52:20.717Z" }, ] [[package]] name = "langchain-core" -version = "0.3.75" +version = "1.0.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonpatch" }, @@ -2987,14 +3104,14 @@ dependencies = [ { name = "tenacity" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/06/63/270b71a23e849984505ddc7c5c9fd3f4bd9cb14b1a484ee44c4e51c33cc2/langchain_core-0.3.75.tar.gz", hash = "sha256:ab0eb95a06ed6043f76162e6086b45037690cb70b7f090bd83b5ebb8a05b70ed", size = 570876, upload-time = "2025-08-26T15:24:12.246Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d9/61/c356e19525a210baf960968dbfb03ee38a05e05ddb41efeb32abfcb4e360/langchain_core-1.0.5.tar.gz", hash = "sha256:7ecbad9a60dde626252733a9c18c7377f4468cfe00465ffa99f5e9c6cb9b82d2", size = 778259, upload-time = "2025-11-14T16:59:27.277Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/42/0d0221cce6f168f644d7d96cb6c87c4e42fc55d2941da7a36e970e3ab8ab/langchain_core-0.3.75-py3-none-any.whl", hash = "sha256:03ca1fadf955ee3c7d5806a841f4b3a37b816acea5e61a7e6ba1298c05eea7f5", size = 443986, upload-time = "2025-08-26T15:24:10.883Z" }, + { url = "https://files.pythonhosted.org/packages/6e/ee/aaf2343a35080154c82ceb110e03dd00f15459bc72e518df51724cbc41a9/langchain_core-1.0.5-py3-none-any.whl", hash = "sha256:d24c0cf12cfcd96dd4bd479aa91425f3a6652226cd824228ae422a195067b74e", size = 471506, upload-time = "2025-11-14T16:59:25.629Z" }, ] [[package]] name = "langchain-fireworks" -version = "0.3.0" +version = "1.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -3003,35 +3120,23 @@ dependencies = [ { name = "openai" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1d/80/78ea4a04b1170cfa7564557808fd80e4c6f812cb5655c95a0374ca79c7ac/langchain_fireworks-0.3.0.tar.gz", hash = "sha256:09db8a06cd50df07068c07c4862e87d70b0da0f7d4e1b06f062c292af61c1433", size = 20900, upload-time = "2025-04-23T14:14:32.438Z" } +sdist = { url = "https://files.pythonhosted.org/packages/76/20/7ce23e7b0a72058a9f70612293aa13ff9fc5c4fe7f5cf97dd81314fe1c53/langchain_fireworks-1.0.0.tar.gz", hash = "sha256:cc8c812c0a1199bdeaabc9210c713e9186784d83fe3c6bc4d98516959f200e8e", size = 167346, upload-time = "2025-10-17T15:36:14.22Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/05/68/79696d5e1573a674141a44c9c59c04629e1ba25673d64a7b03f3843ae162/langchain_fireworks-0.3.0-py3-none-any.whl", hash = "sha256:ef2ea22f8cae3e654f0e1d3eb3a60c5fcd4a914643ab324507997f89f5831166", size = 17770, upload-time = "2025-04-23T14:14:31.373Z" }, + { url = "https://files.pythonhosted.org/packages/b2/50/99ac5df1b99241e0f9df142b9a48053b840639e95e64a7497d2df06167b8/langchain_fireworks-1.0.0-py3-none-any.whl", hash = "sha256:4876086af5e4a606666339bbae6db89514804d928c91ddf059c52f37eafc1d6b", size = 16865, upload-time = "2025-10-17T15:36:13.118Z" }, ] [[package]] name = "langchain-openai" -version = "0.3.23" +version = "1.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, { name = "openai" }, { name = "tiktoken" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/74/f1/575120e829430f9bdcfc2c5c4121f04b1b5a143d96e572ff32399b787ef2/langchain_openai-0.3.23.tar.gz", hash = "sha256:73411c06e04bc145db7146a6fcf33dd0f1a85130499dcae988829a4441ddaa66", size = 647923, upload-time = "2025-06-13T14:24:31.388Z" } +sdist = { url = "https://files.pythonhosted.org/packages/dc/73/6a96bc3a48825317886fa52a2a598286d35cf0384fce5dc3e5da7be06fd0/langchain_openai-1.0.3.tar.gz", hash = "sha256:e9df56540c1118002ab5306208c4845715e9209779c8a7ac9037eded98435fdc", size = 1032676, upload-time = "2025-11-15T00:29:03.774Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/71/65/88060305d5d627841bc8da7e9fb31fb603e5b103b4e5ec5b4d1a7edfbc3b/langchain_openai-0.3.23-py3-none-any.whl", hash = "sha256:624794394482c0923823f0aac44979968d77fdcfa810e42d4b0abd8096199a40", size = 65392, upload-time = "2025-06-13T14:24:30.263Z" }, -] - -[[package]] -name = "langchain-text-splitters" -version = "0.3.9" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "langchain-core" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/91/52/d43ad77acae169210cc476cbc1e4ab37a701017c950211a11ab500fe7d7e/langchain_text_splitters-0.3.9.tar.gz", hash = "sha256:7cd1e5a3aaf609979583eeca2eb34177622570b8fa8f586a605c6b1c34e7ebdb", size = 45260, upload-time = "2025-07-24T14:38:45.14Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e2/52/7638394b88bc15083fd2c3752a843784d9d2d110d68fed6437c8607fb749/langchain_text_splitters-0.3.9-py3-none-any.whl", hash = "sha256:cee0bb816211584ea79cc79927317c358543f40404bcfdd69e69ba3ccde54401", size = 33314, upload-time = "2025-07-24T14:38:43.953Z" }, + { url = "https://files.pythonhosted.org/packages/ff/de/0cb08f8732f070397233df7ad5ef461d83784ce567e7a57d5de5eb96851f/langchain_openai-1.0.3-py3-none-any.whl", hash = "sha256:18d254dbe946d9e9fe6d31416c60c8fc06513427f6e8d8c372e015345e1e17f6", size = 82536, upload-time = "2025-11-15T00:29:02.573Z" }, ] [[package]] @@ -3056,7 +3161,7 @@ wheels = [ [[package]] name = "langgraph" -version = "0.6.7" +version = "1.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, @@ -3066,9 +3171,9 @@ dependencies = [ { name = "pydantic" }, { name = "xxhash" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/56/85/36feb25062da40ca395f6c44d0232a672842e5421885101f6faf4670b670/langgraph-0.6.7.tar.gz", hash = "sha256:ba7fd17b8220142d6a4269b6038f2b3dcbcef42cd5ecf4a4c8d9b60b010830a6", size = 465534, upload-time = "2025-09-07T16:49:42.895Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/55/70f2d11d33b0310d3e48d8e049825b4a34a1c822d48f6448ae548d2cd0f8/langgraph-1.0.3.tar.gz", hash = "sha256:873a6aae6be054ef52a05c463be363a46da9711405b1b14454d595f543b68335", size = 483302, upload-time = "2025-11-10T17:41:45.425Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/06/f440922a58204dbfd10f7fdda0de0325529a159e9dc3d1038afe4b431a49/langgraph-0.6.7-py3-none-any.whl", hash = "sha256:c724dd8c24806b70faf4903e8e20c0234f8c0a356e0e96a88035cbecca9df2cf", size = 153329, upload-time = "2025-09-07T16:49:40.45Z" }, + { url = "https://files.pythonhosted.org/packages/84/a3/fdf6ecd0e44cb02d20afe7d0fb64c748a749f4b2e011bf9a785a32642367/langgraph-1.0.3-py3-none-any.whl", hash = "sha256:4a75146f09bd0d127a724876f4244f460c4c66353a993641bd641ed710cd010f", size = 156845, upload-time = "2025-11-10T17:41:43.868Z" }, ] [[package]] @@ -3086,15 +3191,15 @@ wheels = [ [[package]] name = "langgraph-prebuilt" -version = "0.6.4" +version = "1.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "langchain-core" }, { name = "langgraph-checkpoint" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d6/21/9b198d11732101ee8cdf30af98d0b4f11254c768de15173e57f5260fd14b/langgraph_prebuilt-0.6.4.tar.gz", hash = "sha256:e9e53b906ee5df46541d1dc5303239e815d3ec551e52bb03dd6463acc79ec28f", size = 125695, upload-time = "2025-08-07T18:17:57.333Z" } +sdist = { url = "https://files.pythonhosted.org/packages/84/08/45857c7c65f696307834af13946a72293e6cc49141de887f0957c2eb2c46/langgraph_prebuilt-1.0.4.tar.gz", hash = "sha256:7b4f9e97a146d2d625695c3549bdb432974b80817165139ec2ec869721e72c0f", size = 142470, upload-time = "2025-11-13T19:02:14.807Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/7f/973b0d9729d9693d6e5b4bc5f3ae41138d194cb7b16b0ed230020beeb13a/langgraph_prebuilt-0.6.4-py3-none-any.whl", hash = "sha256:819f31d88b84cb2729ff1b79db2d51e9506b8fb7aaacfc0d359d4fe16e717344", size = 28025, upload-time = "2025-08-07T18:17:56.493Z" }, + { url = "https://files.pythonhosted.org/packages/69/14/a83e50129f66df783a68acb89e7b3e9c39b5c128a8748e961bc2b187f003/langgraph_prebuilt-1.0.4-py3-none-any.whl", hash = "sha256:50b1aa2b434783b6da30785568cf7155136b484750cc2ec695c0d4255db08262", size = 34414, upload-time = "2025-11-13T19:02:13.416Z" }, ] [[package]] @@ -3137,6 +3242,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2d/00/d90b10b962b4277f5e64a78b6609968859ff86889f5b898c1a778c06ec00/lark-1.2.2-py3-none-any.whl", hash = "sha256:c2276486b02f0f1b90be155f2c8ba4a8e194d42775786db622faccd652d8e80c", size = 111036, upload-time = "2024-08-13T19:48:58.603Z" }, ] +[[package]] +name = "libwebarena" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiolimiter" }, + { name = "beartype" }, + { name = "evaluate" }, + { name = "flask" }, + { name = "gymnasium" }, + { name = "nltk" }, + { name = "openai" }, + { name = "pillow" }, + { name = "playwright" }, + { name = "text-generation" }, + { name = "tiktoken" }, + { name = "transformers" }, + { name = "types-tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/48/b3734dbe7058a48bda2065f5af99ac3d4d2cb0938ea5a4501b08e98e9317/libwebarena-0.0.4.tar.gz", hash = "sha256:78f76a415402cebf13b269b74b0a22c1888ae5513a2bde4b4dab781cb9445afd", size = 107046, upload-time = "2024-11-14T20:21:13.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c3/86/495d0d95cd5a1a16496c70f71171ee9a62fb09f2e31ae02443db0bd6aa5f/libwebarena-0.0.4-py3-none-any.whl", hash = "sha256:9ebee3b4371502c4f0f7e727a72e5846235d6750d420db9a3b8a168107654feb", size = 116253, upload-time = "2024-11-14T20:21:11.242Z" }, +] + [[package]] name = "litellm" version = "1.74.9" @@ -3205,6 +3334,88 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595, upload-time = "2024-12-06T11:20:54.538Z" }, ] +[[package]] +name = "lxml" +version = "5.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479, upload-time = "2025-04-23T01:50:29.322Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/1f/a3b6b74a451ceb84b471caa75c934d2430a4d84395d38ef201d539f38cd1/lxml-5.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e7bc6df34d42322c5289e37e9971d6ed114e3776b45fa879f734bded9d1fea9c", size = 8076838, upload-time = "2025-04-23T01:44:29.325Z" }, + { url = "https://files.pythonhosted.org/packages/36/af/a567a55b3e47135b4d1f05a1118c24529104c003f95851374b3748139dc1/lxml-5.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6854f8bd8a1536f8a1d9a3655e6354faa6406621cf857dc27b681b69860645c7", size = 4381827, upload-time = "2025-04-23T01:44:33.345Z" }, + { url = "https://files.pythonhosted.org/packages/50/ba/4ee47d24c675932b3eb5b6de77d0f623c2db6dc466e7a1f199792c5e3e3a/lxml-5.4.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:696ea9e87442467819ac22394ca36cb3d01848dad1be6fac3fb612d3bd5a12cf", size = 5204098, upload-time = "2025-04-23T01:44:35.809Z" }, + { url = "https://files.pythonhosted.org/packages/f2/0f/b4db6dfebfefe3abafe360f42a3d471881687fd449a0b86b70f1f2683438/lxml-5.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef80aeac414f33c24b3815ecd560cee272786c3adfa5f31316d8b349bfade28", size = 4930261, upload-time = "2025-04-23T01:44:38.271Z" }, + { url = "https://files.pythonhosted.org/packages/0b/1f/0bb1bae1ce056910f8db81c6aba80fec0e46c98d77c0f59298c70cd362a3/lxml-5.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b9c2754cef6963f3408ab381ea55f47dabc6f78f4b8ebb0f0b25cf1ac1f7609", size = 5529621, upload-time = "2025-04-23T01:44:40.921Z" }, + { url = "https://files.pythonhosted.org/packages/21/f5/e7b66a533fc4a1e7fa63dd22a1ab2ec4d10319b909211181e1ab3e539295/lxml-5.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a62cc23d754bb449d63ff35334acc9f5c02e6dae830d78dab4dd12b78a524f4", size = 4983231, upload-time = "2025-04-23T01:44:43.871Z" }, + { url = "https://files.pythonhosted.org/packages/11/39/a38244b669c2d95a6a101a84d3c85ba921fea827e9e5483e93168bf1ccb2/lxml-5.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f82125bc7203c5ae8633a7d5d20bcfdff0ba33e436e4ab0abc026a53a8960b7", size = 5084279, upload-time = "2025-04-23T01:44:46.632Z" }, + { url = "https://files.pythonhosted.org/packages/db/64/48cac242347a09a07740d6cee7b7fd4663d5c1abd65f2e3c60420e231b27/lxml-5.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b67319b4aef1a6c56576ff544b67a2a6fbd7eaee485b241cabf53115e8908b8f", size = 4927405, upload-time = "2025-04-23T01:44:49.843Z" }, + { url = "https://files.pythonhosted.org/packages/98/89/97442835fbb01d80b72374f9594fe44f01817d203fa056e9906128a5d896/lxml-5.4.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:a8ef956fce64c8551221f395ba21d0724fed6b9b6242ca4f2f7beb4ce2f41997", size = 5550169, upload-time = "2025-04-23T01:44:52.791Z" }, + { url = "https://files.pythonhosted.org/packages/f1/97/164ca398ee654eb21f29c6b582685c6c6b9d62d5213abc9b8380278e9c0a/lxml-5.4.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:0a01ce7d8479dce84fc03324e3b0c9c90b1ece9a9bb6a1b6c9025e7e4520e78c", size = 5062691, upload-time = "2025-04-23T01:44:56.108Z" }, + { url = "https://files.pythonhosted.org/packages/d0/bc/712b96823d7feb53482d2e4f59c090fb18ec7b0d0b476f353b3085893cda/lxml-5.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91505d3ddebf268bb1588eb0f63821f738d20e1e7f05d3c647a5ca900288760b", size = 5133503, upload-time = "2025-04-23T01:44:59.222Z" }, + { url = "https://files.pythonhosted.org/packages/d4/55/a62a39e8f9da2a8b6002603475e3c57c870cd9c95fd4b94d4d9ac9036055/lxml-5.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a3bcdde35d82ff385f4ede021df801b5c4a5bcdfb61ea87caabcebfc4945dc1b", size = 4999346, upload-time = "2025-04-23T01:45:02.088Z" }, + { url = "https://files.pythonhosted.org/packages/ea/47/a393728ae001b92bb1a9e095e570bf71ec7f7fbae7688a4792222e56e5b9/lxml-5.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aea7c06667b987787c7d1f5e1dfcd70419b711cdb47d6b4bb4ad4b76777a0563", size = 5627139, upload-time = "2025-04-23T01:45:04.582Z" }, + { url = "https://files.pythonhosted.org/packages/5e/5f/9dcaaad037c3e642a7ea64b479aa082968de46dd67a8293c541742b6c9db/lxml-5.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:a7fb111eef4d05909b82152721a59c1b14d0f365e2be4c742a473c5d7372f4f5", size = 5465609, upload-time = "2025-04-23T01:45:07.649Z" }, + { url = "https://files.pythonhosted.org/packages/a7/0a/ebcae89edf27e61c45023005171d0ba95cb414ee41c045ae4caf1b8487fd/lxml-5.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:43d549b876ce64aa18b2328faff70f5877f8c6dede415f80a2f799d31644d776", size = 5192285, upload-time = "2025-04-23T01:45:10.456Z" }, + { url = "https://files.pythonhosted.org/packages/42/ad/cc8140ca99add7d85c92db8b2354638ed6d5cc0e917b21d36039cb15a238/lxml-5.4.0-cp310-cp310-win32.whl", hash = "sha256:75133890e40d229d6c5837b0312abbe5bac1c342452cf0e12523477cd3aa21e7", size = 3477507, upload-time = "2025-04-23T01:45:12.474Z" }, + { url = "https://files.pythonhosted.org/packages/e9/39/597ce090da1097d2aabd2f9ef42187a6c9c8546d67c419ce61b88b336c85/lxml-5.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:de5b4e1088523e2b6f730d0509a9a813355b7f5659d70eb4f319c76beea2e250", size = 3805104, upload-time = "2025-04-23T01:45:15.104Z" }, + { url = "https://files.pythonhosted.org/packages/81/2d/67693cc8a605a12e5975380d7ff83020dcc759351b5a066e1cced04f797b/lxml-5.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:98a3912194c079ef37e716ed228ae0dcb960992100461b704aea4e93af6b0bb9", size = 8083240, upload-time = "2025-04-23T01:45:18.566Z" }, + { url = "https://files.pythonhosted.org/packages/73/53/b5a05ab300a808b72e848efd152fe9c022c0181b0a70b8bca1199f1bed26/lxml-5.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ea0252b51d296a75f6118ed0d8696888e7403408ad42345d7dfd0d1e93309a7", size = 4387685, upload-time = "2025-04-23T01:45:21.387Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cb/1a3879c5f512bdcd32995c301886fe082b2edd83c87d41b6d42d89b4ea4d/lxml-5.4.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b92b69441d1bd39f4940f9eadfa417a25862242ca2c396b406f9272ef09cdcaa", size = 4991164, upload-time = "2025-04-23T01:45:23.849Z" }, + { url = "https://files.pythonhosted.org/packages/f9/94/bbc66e42559f9d04857071e3b3d0c9abd88579367fd2588a4042f641f57e/lxml-5.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20e16c08254b9b6466526bc1828d9370ee6c0d60a4b64836bc3ac2917d1e16df", size = 4746206, upload-time = "2025-04-23T01:45:26.361Z" }, + { url = "https://files.pythonhosted.org/packages/66/95/34b0679bee435da2d7cae895731700e519a8dfcab499c21662ebe671603e/lxml-5.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7605c1c32c3d6e8c990dd28a0970a3cbbf1429d5b92279e37fda05fb0c92190e", size = 5342144, upload-time = "2025-04-23T01:45:28.939Z" }, + { url = "https://files.pythonhosted.org/packages/e0/5d/abfcc6ab2fa0be72b2ba938abdae1f7cad4c632f8d552683ea295d55adfb/lxml-5.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecf4c4b83f1ab3d5a7ace10bafcb6f11df6156857a3c418244cef41ca9fa3e44", size = 4825124, upload-time = "2025-04-23T01:45:31.361Z" }, + { url = "https://files.pythonhosted.org/packages/5a/78/6bd33186c8863b36e084f294fc0a5e5eefe77af95f0663ef33809cc1c8aa/lxml-5.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cef4feae82709eed352cd7e97ae062ef6ae9c7b5dbe3663f104cd2c0e8d94ba", size = 4876520, upload-time = "2025-04-23T01:45:34.191Z" }, + { url = "https://files.pythonhosted.org/packages/3b/74/4d7ad4839bd0fc64e3d12da74fc9a193febb0fae0ba6ebd5149d4c23176a/lxml-5.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:df53330a3bff250f10472ce96a9af28628ff1f4efc51ccba351a8820bca2a8ba", size = 4765016, upload-time = "2025-04-23T01:45:36.7Z" }, + { url = "https://files.pythonhosted.org/packages/24/0d/0a98ed1f2471911dadfc541003ac6dd6879fc87b15e1143743ca20f3e973/lxml-5.4.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:aefe1a7cb852fa61150fcb21a8c8fcea7b58c4cb11fbe59c97a0a4b31cae3c8c", size = 5362884, upload-time = "2025-04-23T01:45:39.291Z" }, + { url = "https://files.pythonhosted.org/packages/48/de/d4f7e4c39740a6610f0f6959052b547478107967362e8424e1163ec37ae8/lxml-5.4.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ef5a7178fcc73b7d8c07229e89f8eb45b2908a9238eb90dcfc46571ccf0383b8", size = 4902690, upload-time = "2025-04-23T01:45:42.386Z" }, + { url = "https://files.pythonhosted.org/packages/07/8c/61763abd242af84f355ca4ef1ee096d3c1b7514819564cce70fd18c22e9a/lxml-5.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d2ed1b3cb9ff1c10e6e8b00941bb2e5bb568b307bfc6b17dffbbe8be5eecba86", size = 4944418, upload-time = "2025-04-23T01:45:46.051Z" }, + { url = "https://files.pythonhosted.org/packages/f9/c5/6d7e3b63e7e282619193961a570c0a4c8a57fe820f07ca3fe2f6bd86608a/lxml-5.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:72ac9762a9f8ce74c9eed4a4e74306f2f18613a6b71fa065495a67ac227b3056", size = 4827092, upload-time = "2025-04-23T01:45:48.943Z" }, + { url = "https://files.pythonhosted.org/packages/71/4a/e60a306df54680b103348545706a98a7514a42c8b4fbfdcaa608567bb065/lxml-5.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f5cb182f6396706dc6cc1896dd02b1c889d644c081b0cdec38747573db88a7d7", size = 5418231, upload-time = "2025-04-23T01:45:51.481Z" }, + { url = "https://files.pythonhosted.org/packages/27/f2/9754aacd6016c930875854f08ac4b192a47fe19565f776a64004aa167521/lxml-5.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:3a3178b4873df8ef9457a4875703488eb1622632a9cee6d76464b60e90adbfcd", size = 5261798, upload-time = "2025-04-23T01:45:54.146Z" }, + { url = "https://files.pythonhosted.org/packages/38/a2/0c49ec6941428b1bd4f280650d7b11a0f91ace9db7de32eb7aa23bcb39ff/lxml-5.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e094ec83694b59d263802ed03a8384594fcce477ce484b0cbcd0008a211ca751", size = 4988195, upload-time = "2025-04-23T01:45:56.685Z" }, + { url = "https://files.pythonhosted.org/packages/7a/75/87a3963a08eafc46a86c1131c6e28a4de103ba30b5ae903114177352a3d7/lxml-5.4.0-cp311-cp311-win32.whl", hash = "sha256:4329422de653cdb2b72afa39b0aa04252fca9071550044904b2e7036d9d97fe4", size = 3474243, upload-time = "2025-04-23T01:45:58.863Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f9/1f0964c4f6c2be861c50db380c554fb8befbea98c6404744ce243a3c87ef/lxml-5.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd3be6481ef54b8cfd0e1e953323b7aa9d9789b94842d0e5b142ef4bb7999539", size = 3815197, upload-time = "2025-04-23T01:46:01.096Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4c/d101ace719ca6a4ec043eb516fcfcb1b396a9fccc4fcd9ef593df34ba0d5/lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4", size = 8127392, upload-time = "2025-04-23T01:46:04.09Z" }, + { url = "https://files.pythonhosted.org/packages/11/84/beddae0cec4dd9ddf46abf156f0af451c13019a0fa25d7445b655ba5ccb7/lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d", size = 4415103, upload-time = "2025-04-23T01:46:07.227Z" }, + { url = "https://files.pythonhosted.org/packages/d0/25/d0d93a4e763f0462cccd2b8a665bf1e4343dd788c76dcfefa289d46a38a9/lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779", size = 5024224, upload-time = "2025-04-23T01:46:10.237Z" }, + { url = "https://files.pythonhosted.org/packages/31/ce/1df18fb8f7946e7f3388af378b1f34fcf253b94b9feedb2cec5969da8012/lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e", size = 4769913, upload-time = "2025-04-23T01:46:12.757Z" }, + { url = "https://files.pythonhosted.org/packages/4e/62/f4a6c60ae7c40d43657f552f3045df05118636be1165b906d3423790447f/lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9", size = 5290441, upload-time = "2025-04-23T01:46:16.037Z" }, + { url = "https://files.pythonhosted.org/packages/9e/aa/04f00009e1e3a77838c7fc948f161b5d2d5de1136b2b81c712a263829ea4/lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5", size = 4820165, upload-time = "2025-04-23T01:46:19.137Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/e0b2f61fa2404bf0f1fdf1898377e5bd1b74cc9b2cf2c6ba8509b8f27990/lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5", size = 4932580, upload-time = "2025-04-23T01:46:21.963Z" }, + { url = "https://files.pythonhosted.org/packages/24/a2/8263f351b4ffe0ed3e32ea7b7830f845c795349034f912f490180d88a877/lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4", size = 4759493, upload-time = "2025-04-23T01:46:24.316Z" }, + { url = "https://files.pythonhosted.org/packages/05/00/41db052f279995c0e35c79d0f0fc9f8122d5b5e9630139c592a0b58c71b4/lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e", size = 5324679, upload-time = "2025-04-23T01:46:27.097Z" }, + { url = "https://files.pythonhosted.org/packages/1d/be/ee99e6314cdef4587617d3b3b745f9356d9b7dd12a9663c5f3b5734b64ba/lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7", size = 4890691, upload-time = "2025-04-23T01:46:30.009Z" }, + { url = "https://files.pythonhosted.org/packages/ad/36/239820114bf1d71f38f12208b9c58dec033cbcf80101cde006b9bde5cffd/lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079", size = 4955075, upload-time = "2025-04-23T01:46:32.33Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e1/1b795cc0b174efc9e13dbd078a9ff79a58728a033142bc6d70a1ee8fc34d/lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20", size = 4838680, upload-time = "2025-04-23T01:46:34.852Z" }, + { url = "https://files.pythonhosted.org/packages/72/48/3c198455ca108cec5ae3662ae8acd7fd99476812fd712bb17f1b39a0b589/lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8", size = 5391253, upload-time = "2025-04-23T01:46:37.608Z" }, + { url = "https://files.pythonhosted.org/packages/d6/10/5bf51858971c51ec96cfc13e800a9951f3fd501686f4c18d7d84fe2d6352/lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f", size = 5261651, upload-time = "2025-04-23T01:46:40.183Z" }, + { url = "https://files.pythonhosted.org/packages/2b/11/06710dd809205377da380546f91d2ac94bad9ff735a72b64ec029f706c85/lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc", size = 5024315, upload-time = "2025-04-23T01:46:43.333Z" }, + { url = "https://files.pythonhosted.org/packages/f5/b0/15b6217834b5e3a59ebf7f53125e08e318030e8cc0d7310355e6edac98ef/lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f", size = 3486149, upload-time = "2025-04-23T01:46:45.684Z" }, + { url = "https://files.pythonhosted.org/packages/91/1e/05ddcb57ad2f3069101611bd5f5084157d90861a2ef460bf42f45cced944/lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2", size = 3817095, upload-time = "2025-04-23T01:46:48.521Z" }, + { url = "https://files.pythonhosted.org/packages/87/cb/2ba1e9dd953415f58548506fa5549a7f373ae55e80c61c9041b7fd09a38a/lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0", size = 8110086, upload-time = "2025-04-23T01:46:52.218Z" }, + { url = "https://files.pythonhosted.org/packages/b5/3e/6602a4dca3ae344e8609914d6ab22e52ce42e3e1638c10967568c5c1450d/lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de", size = 4404613, upload-time = "2025-04-23T01:46:55.281Z" }, + { url = "https://files.pythonhosted.org/packages/4c/72/bf00988477d3bb452bef9436e45aeea82bb40cdfb4684b83c967c53909c7/lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76", size = 5012008, upload-time = "2025-04-23T01:46:57.817Z" }, + { url = "https://files.pythonhosted.org/packages/92/1f/93e42d93e9e7a44b2d3354c462cd784dbaaf350f7976b5d7c3f85d68d1b1/lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d", size = 4760915, upload-time = "2025-04-23T01:47:00.745Z" }, + { url = "https://files.pythonhosted.org/packages/45/0b/363009390d0b461cf9976a499e83b68f792e4c32ecef092f3f9ef9c4ba54/lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422", size = 5283890, upload-time = "2025-04-23T01:47:04.702Z" }, + { url = "https://files.pythonhosted.org/packages/19/dc/6056c332f9378ab476c88e301e6549a0454dbee8f0ae16847414f0eccb74/lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551", size = 4812644, upload-time = "2025-04-23T01:47:07.833Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/f8c66bbb23ecb9048a46a5ef9b495fd23f7543df642dabeebcb2eeb66592/lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c", size = 4921817, upload-time = "2025-04-23T01:47:10.317Z" }, + { url = "https://files.pythonhosted.org/packages/04/57/2e537083c3f381f83d05d9b176f0d838a9e8961f7ed8ddce3f0217179ce3/lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff", size = 4753916, upload-time = "2025-04-23T01:47:12.823Z" }, + { url = "https://files.pythonhosted.org/packages/d8/80/ea8c4072109a350848f1157ce83ccd9439601274035cd045ac31f47f3417/lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60", size = 5289274, upload-time = "2025-04-23T01:47:15.916Z" }, + { url = "https://files.pythonhosted.org/packages/b3/47/c4be287c48cdc304483457878a3f22999098b9a95f455e3c4bda7ec7fc72/lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8", size = 4874757, upload-time = "2025-04-23T01:47:19.793Z" }, + { url = "https://files.pythonhosted.org/packages/2f/04/6ef935dc74e729932e39478e44d8cfe6a83550552eaa072b7c05f6f22488/lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982", size = 4947028, upload-time = "2025-04-23T01:47:22.401Z" }, + { url = "https://files.pythonhosted.org/packages/cb/f9/c33fc8daa373ef8a7daddb53175289024512b6619bc9de36d77dca3df44b/lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61", size = 4834487, upload-time = "2025-04-23T01:47:25.513Z" }, + { url = "https://files.pythonhosted.org/packages/8d/30/fc92bb595bcb878311e01b418b57d13900f84c2b94f6eca9e5073ea756e6/lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54", size = 5381688, upload-time = "2025-04-23T01:47:28.454Z" }, + { url = "https://files.pythonhosted.org/packages/43/d1/3ba7bd978ce28bba8e3da2c2e9d5ae3f8f521ad3f0ca6ea4788d086ba00d/lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b", size = 5242043, upload-time = "2025-04-23T01:47:31.208Z" }, + { url = "https://files.pythonhosted.org/packages/ee/cd/95fa2201041a610c4d08ddaf31d43b98ecc4b1d74b1e7245b1abdab443cb/lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a", size = 5021569, upload-time = "2025-04-23T01:47:33.805Z" }, + { url = "https://files.pythonhosted.org/packages/2d/a6/31da006fead660b9512d08d23d31e93ad3477dd47cc42e3285f143443176/lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82", size = 3485270, upload-time = "2025-04-23T01:47:36.133Z" }, + { url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606, upload-time = "2025-04-23T01:47:39.028Z" }, + { url = "https://files.pythonhosted.org/packages/c6/b0/e4d1cbb8c078bc4ae44de9c6a79fec4e2b4151b1b4d50af71d799e76b177/lxml-5.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1b717b00a71b901b4667226bba282dd462c42ccf618ade12f9ba3674e1fabc55", size = 3892319, upload-time = "2025-04-23T01:49:22.069Z" }, + { url = "https://files.pythonhosted.org/packages/5b/aa/e2bdefba40d815059bcb60b371a36fbfcce970a935370e1b367ba1cc8f74/lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27a9ded0f0b52098ff89dd4c418325b987feed2ea5cc86e8860b0f844285d740", size = 4211614, upload-time = "2025-04-23T01:49:24.599Z" }, + { url = "https://files.pythonhosted.org/packages/3c/5f/91ff89d1e092e7cfdd8453a939436ac116db0a665e7f4be0cd8e65c7dc5a/lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b7ce10634113651d6f383aa712a194179dcd496bd8c41e191cec2099fa09de5", size = 4306273, upload-time = "2025-04-23T01:49:27.355Z" }, + { url = "https://files.pythonhosted.org/packages/be/7c/8c3f15df2ca534589717bfd19d1e3482167801caedfa4d90a575facf68a6/lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53370c26500d22b45182f98847243efb518d268374a9570409d2e2276232fd37", size = 4208552, upload-time = "2025-04-23T01:49:29.949Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d8/9567afb1665f64d73fc54eb904e418d1138d7f011ed00647121b4dd60b38/lxml-5.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6364038c519dffdbe07e3cf42e6a7f8b90c275d4d1617a69bb59734c1a2d571", size = 4331091, upload-time = "2025-04-23T01:49:32.842Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ab/fdbbd91d8d82bf1a723ba88ec3e3d76c022b53c391b0c13cad441cdb8f9e/lxml-5.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b12cb6527599808ada9eb2cd6e0e7d3d8f13fe7bbb01c6311255a15ded4c7ab4", size = 3487862, upload-time = "2025-04-23T01:49:36.296Z" }, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -3321,6 +3532,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/3f/d085c7f49ade6d273b185d61ec9405e672b6433f710ea64a90135a8dd445/mcp-1.13.1-py3-none-any.whl", hash = "sha256:c314e7c8bd477a23ba3ef472ee5a32880316c42d03e06dcfa31a1cc7a73b65df", size = 161494, upload-time = "2025-08-22T09:22:14.705Z" }, ] +[package.optional-dependencies] +cli = [ + { name = "python-dotenv" }, + { name = "typer" }, +] + [[package]] name = "mdurl" version = "0.1.2" @@ -3714,6 +3931,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/76/3165e84e5266d146d967a6cc784ff2fbf6ddd00985a55ec006b72bc39d5d/nh3-0.3.0-cp38-abi3-win_arm64.whl", hash = "sha256:d97d3efd61404af7e5721a0e74d81cdbfc6e5f97e11e731bb6d090e30a7b62b2", size = 585971, upload-time = "2025-07-17T14:43:35.936Z" }, ] +[[package]] +name = "nltk" +version = "3.9.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "joblib" }, + { name = "regex" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629, upload-time = "2025-10-01T07:19:23.764Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload-time = "2025-10-01T07:19:21.648Z" }, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -4064,7 +4296,7 @@ wheels = [ [[package]] name = "openai" -version = "1.107.0" +version = "2.8.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -4076,9 +4308,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/88/67/d6498de300f83ff57a79cb7aa96ef3bef8d6f070c3ded0f1b5b45442a6bc/openai-1.107.0.tar.gz", hash = "sha256:43e04927584e57d0e9e640ee0077c78baf8150098be96ebd5c512539b6c4e9a4", size = 566056, upload-time = "2025-09-08T19:25:47.604Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/e4/42591e356f1d53c568418dc7e30dcda7be31dd5a4d570bca22acb0525862/openai-2.8.1.tar.gz", hash = "sha256:cb1b79eef6e809f6da326a7ef6038719e35aa944c42d081807bfa1be8060f15f", size = 602490, upload-time = "2025-11-17T22:39:59.549Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/91/ed/e8a4fd20390f2858b95227c288df8fe0c835f7c77625f7583609161684ba/openai-1.107.0-py3-none-any.whl", hash = "sha256:3dcfa3cbb116bd6924b27913b8da28c4a787379ff60049588547a1013e6d6438", size = 950968, upload-time = "2025-09-08T19:25:45.552Z" }, + { url = "https://files.pythonhosted.org/packages/55/4f/dbc0c124c40cb390508a82770fb9f6e3ed162560181a85089191a851c59a/openai-2.8.1-py3-none-any.whl", hash = "sha256:c6c3b5a04994734386e8dad3c00a393f56d3b68a27cd2e8acae91a59e4122463", size = 1022688, upload-time = "2025-11-17T22:39:57.675Z" }, ] [[package]] @@ -4093,6 +4325,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" }, ] +[[package]] +name = "openenv" +version = "0.1.1" +source = { git = "https://github.com/meta-pytorch/OpenEnv.git#8db06338b98726aeb382b1da5143e5ed1cd2c839" } +dependencies = [ + { name = "fastapi" }, + { name = "huggingface-hub" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "rich" }, + { name = "tomli" }, + { name = "tomli-w" }, + { name = "typer" }, + { name = "uvicorn" }, +] + +[[package]] +name = "openenv-browsergym-env" +version = "0.1.0" +source = { git = "https://github.com/meta-pytorch/OpenEnv.git?subdirectory=src%2Fenvs%2Fbrowsergym_env#8db06338b98726aeb382b1da5143e5ed1cd2c839" } +dependencies = [ + { name = "browsergym-core" }, + { name = "browsergym-miniwob" }, + { name = "browsergym-webarena" }, + { name = "fastapi" }, + { name = "gymnasium" }, + { name = "openenv-core" }, + { name = "pillow" }, + { name = "playwright" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "uvicorn" }, +] + +[[package]] +name = "openenv-core" +version = "0.1.0" +source = { git = "https://github.com/meta-pytorch/OpenEnv.git?subdirectory=src%2Fcore#8db06338b98726aeb382b1da5143e5ed1cd2c839" } +dependencies = [ + { name = "fastapi" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "uvicorn", extra = ["standard"] }, +] + [[package]] name = "openevals" version = "0.1.0" @@ -4633,6 +4912,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, ] +[[package]] +name = "playwright" +version = "1.44.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet" }, + { name = "pyee" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/4e/e4b176c545ef25829ac66148f659a7e4e49bdc1d2e5fb4083ada772fe4d6/playwright-1.44.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:c2317a80896796fdeb03d60f06cc229e775ff2e19b80c64b1bb9b29c8a59d992", size = 34700291, upload-time = "2024-05-17T15:06:03.789Z" }, + { url = "https://files.pythonhosted.org/packages/81/7b/7dfbcdc44d08d0257e1e614d17a942b6915021bc1f72a671d51b4c8b8084/playwright-1.44.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:54d44fb634d870839301c2326e1e12a178a1be0de76d0caaec230ab075c2e077", size = 33028298, upload-time = "2024-05-17T15:06:14.615Z" }, + { url = "https://files.pythonhosted.org/packages/16/2c/87b865dafed50d9111640d8134ac1c3264782ec7a12f026ec29c27d01a26/playwright-1.44.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:64b67194e73b47ae72acf25f1a9cfacfef38ca2b52e4bb8b0abd385c5deeaadf", size = 34700291, upload-time = "2024-05-17T15:06:19.117Z" }, + { url = "https://files.pythonhosted.org/packages/5a/8e/2d6e29fe0d5f6d1ca55de8a6a737b6086b1f59f4b7519d3abb4944f67008/playwright-1.44.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:29161b1fae71f7c402df5b15f0bd3deaeecd8b3d1ecd9ff01271700c66210e7b", size = 37826267, upload-time = "2024-05-17T15:06:23.796Z" }, + { url = "https://files.pythonhosted.org/packages/d0/c0/19655639651722867cb78b21c5617545e14f2f5133097b7e3595d56aa379/playwright-1.44.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8c8a3bfea17576d3f94a2363eee195cbda8dbba86975588c7eaac7792b25eee", size = 37584496, upload-time = "2024-05-17T15:06:28.956Z" }, + { url = "https://files.pythonhosted.org/packages/07/5c/4e3c1c9bafe11aca80446fedde2e563c6ecd6690c46d6def0d3fae5ac7e2/playwright-1.44.0-py3-none-win32.whl", hash = "sha256:235e37832deaa9af8a629d09955396259ab757533cc1922f9b0308b4ee0d9cdf", size = 29666760, upload-time = "2024-05-17T15:06:34.542Z" }, + { url = "https://files.pythonhosted.org/packages/e6/f9/724595acdb66622ced0919e0e714a8ec3228d9b38827f61721126645912f/playwright-1.44.0-py3-none-win_amd64.whl", hash = "sha256:5b8a4a1d4d50f4ff99b47965576322a8c4e34631854b862a25c1feb824be22a8", size = 29666766, upload-time = "2024-05-17T15:06:38.406Z" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -5280,6 +5577,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" }, ] +[[package]] +name = "pyee" +version = "11.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f7/22/b4c7f3d9579204a014c4eda0e019e6bfe56af52a96cacc82004b60eec079/pyee-11.1.0.tar.gz", hash = "sha256:b53af98f6990c810edd9b56b87791021a8f54fd13db4edd1142438d44ba2263f", size = 29806, upload-time = "2023-11-23T17:13:25.913Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/cc/5cea8a0a0d3deb90b5a0d39ad1a6a1ccaa40a9ea86d793eb8a49d32a6ed0/pyee-11.1.0-py3-none-any.whl", hash = "sha256:5d346a7d0f861a4b2e6c47960295bd895f816725b27d656181947346be98d7c1", size = 15263, upload-time = "2023-11-23T17:13:24.486Z" }, +] + [[package]] name = "pygame" version = "2.6.1" @@ -5347,6 +5656,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0f/e8/11644fe823e05c583b330e9fb81e3e8fc5d079036512a8300fc157be349d/pykakasi-2.3.0-py3-none-any.whl", hash = "sha256:26d21b090048ff45c6a4d8e962426b7951767216008ec30358e8a9d74af77f29", size = 2395003, upload-time = "2024-06-24T04:57:18.101Z" }, ] +[[package]] +name = "pyparsing" +version = "3.2.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/a5/181488fc2b9d093e3972d2a472855aae8a03f000592dbfce716a512b3359/pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6", size = 1099274, upload-time = "2025-09-21T04:11:06.277Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/5e/1aa9a93198c6b64513c9d7752de7422c06402de6600a8767da1524f9570b/pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e", size = 113890, upload-time = "2025-09-21T04:11:04.117Z" }, +] + [[package]] name = "pyperclip" version = "1.9.0" @@ -6301,51 +6619,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677, upload-time = "2025-04-20T18:50:07.196Z" }, ] -[[package]] -name = "sqlalchemy" -version = "2.0.41" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424, upload-time = "2025-05-14T17:10:32.339Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/12/d7c445b1940276a828efce7331cb0cb09d6e5f049651db22f4ebb0922b77/sqlalchemy-2.0.41-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b1f09b6821406ea1f94053f346f28f8215e293344209129a9c0fcc3578598d7b", size = 2117967, upload-time = "2025-05-14T17:48:15.841Z" }, - { url = "https://files.pythonhosted.org/packages/6f/b8/cb90f23157e28946b27eb01ef401af80a1fab7553762e87df51507eaed61/sqlalchemy-2.0.41-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1936af879e3db023601196a1684d28e12f19ccf93af01bf3280a3262c4b6b4e5", size = 2107583, upload-time = "2025-05-14T17:48:18.688Z" }, - { url = "https://files.pythonhosted.org/packages/9e/c2/eef84283a1c8164a207d898e063edf193d36a24fb6a5bb3ce0634b92a1e8/sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2ac41acfc8d965fb0c464eb8f44995770239668956dc4cdf502d1b1ffe0d747", size = 3186025, upload-time = "2025-05-14T17:51:51.226Z" }, - { url = "https://files.pythonhosted.org/packages/bd/72/49d52bd3c5e63a1d458fd6d289a1523a8015adedbddf2c07408ff556e772/sqlalchemy-2.0.41-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81c24e0c0fde47a9723c81d5806569cddef103aebbf79dbc9fcbb617153dea30", size = 3186259, upload-time = "2025-05-14T17:55:22.526Z" }, - { url = "https://files.pythonhosted.org/packages/4f/9e/e3ffc37d29a3679a50b6bbbba94b115f90e565a2b4545abb17924b94c52d/sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23a8825495d8b195c4aa9ff1c430c28f2c821e8c5e2d98089228af887e5d7e29", size = 3126803, upload-time = "2025-05-14T17:51:53.277Z" }, - { url = "https://files.pythonhosted.org/packages/8a/76/56b21e363f6039978ae0b72690237b38383e4657281285a09456f313dd77/sqlalchemy-2.0.41-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:60c578c45c949f909a4026b7807044e7e564adf793537fc762b2489d522f3d11", size = 3148566, upload-time = "2025-05-14T17:55:24.398Z" }, - { url = "https://files.pythonhosted.org/packages/3b/92/11b8e1b69bf191bc69e300a99badbbb5f2f1102f2b08b39d9eee2e21f565/sqlalchemy-2.0.41-cp310-cp310-win32.whl", hash = "sha256:118c16cd3f1b00c76d69343e38602006c9cfb9998fa4f798606d28d63f23beda", size = 2086696, upload-time = "2025-05-14T17:55:59.136Z" }, - { url = "https://files.pythonhosted.org/packages/5c/88/2d706c9cc4502654860f4576cd54f7db70487b66c3b619ba98e0be1a4642/sqlalchemy-2.0.41-cp310-cp310-win_amd64.whl", hash = "sha256:7492967c3386df69f80cf67efd665c0f667cee67032090fe01d7d74b0e19bb08", size = 2110200, upload-time = "2025-05-14T17:56:00.757Z" }, - { url = "https://files.pythonhosted.org/packages/37/4e/b00e3ffae32b74b5180e15d2ab4040531ee1bef4c19755fe7926622dc958/sqlalchemy-2.0.41-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6375cd674fe82d7aa9816d1cb96ec592bac1726c11e0cafbf40eeee9a4516b5f", size = 2121232, upload-time = "2025-05-14T17:48:20.444Z" }, - { url = "https://files.pythonhosted.org/packages/ef/30/6547ebb10875302074a37e1970a5dce7985240665778cfdee2323709f749/sqlalchemy-2.0.41-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f8c9fdd15a55d9465e590a402f42082705d66b05afc3ffd2d2eb3c6ba919560", size = 2110897, upload-time = "2025-05-14T17:48:21.634Z" }, - { url = "https://files.pythonhosted.org/packages/9e/21/59df2b41b0f6c62da55cd64798232d7349a9378befa7f1bb18cf1dfd510a/sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32f9dc8c44acdee06c8fc6440db9eae8b4af8b01e4b1aee7bdd7241c22edff4f", size = 3273313, upload-time = "2025-05-14T17:51:56.205Z" }, - { url = "https://files.pythonhosted.org/packages/62/e4/b9a7a0e5c6f79d49bcd6efb6e90d7536dc604dab64582a9dec220dab54b6/sqlalchemy-2.0.41-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c11ceb9a1f482c752a71f203a81858625d8df5746d787a4786bca4ffdf71c6", size = 3273807, upload-time = "2025-05-14T17:55:26.928Z" }, - { url = "https://files.pythonhosted.org/packages/39/d8/79f2427251b44ddee18676c04eab038d043cff0e764d2d8bb08261d6135d/sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:911cc493ebd60de5f285bcae0491a60b4f2a9f0f5c270edd1c4dbaef7a38fc04", size = 3209632, upload-time = "2025-05-14T17:51:59.384Z" }, - { url = "https://files.pythonhosted.org/packages/d4/16/730a82dda30765f63e0454918c982fb7193f6b398b31d63c7c3bd3652ae5/sqlalchemy-2.0.41-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03968a349db483936c249f4d9cd14ff2c296adfa1290b660ba6516f973139582", size = 3233642, upload-time = "2025-05-14T17:55:29.901Z" }, - { url = "https://files.pythonhosted.org/packages/04/61/c0d4607f7799efa8b8ea3c49b4621e861c8f5c41fd4b5b636c534fcb7d73/sqlalchemy-2.0.41-cp311-cp311-win32.whl", hash = "sha256:293cd444d82b18da48c9f71cd7005844dbbd06ca19be1ccf6779154439eec0b8", size = 2086475, upload-time = "2025-05-14T17:56:02.095Z" }, - { url = "https://files.pythonhosted.org/packages/9d/8e/8344f8ae1cb6a479d0741c02cd4f666925b2bf02e2468ddaf5ce44111f30/sqlalchemy-2.0.41-cp311-cp311-win_amd64.whl", hash = "sha256:3d3549fc3e40667ec7199033a4e40a2f669898a00a7b18a931d3efb4c7900504", size = 2110903, upload-time = "2025-05-14T17:56:03.499Z" }, - { url = "https://files.pythonhosted.org/packages/3e/2a/f1f4e068b371154740dd10fb81afb5240d5af4aa0087b88d8b308b5429c2/sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9", size = 2119645, upload-time = "2025-05-14T17:55:24.854Z" }, - { url = "https://files.pythonhosted.org/packages/9b/e8/c664a7e73d36fbfc4730f8cf2bf930444ea87270f2825efbe17bf808b998/sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1", size = 2107399, upload-time = "2025-05-14T17:55:28.097Z" }, - { url = "https://files.pythonhosted.org/packages/5c/78/8a9cf6c5e7135540cb682128d091d6afa1b9e48bd049b0d691bf54114f70/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70", size = 3293269, upload-time = "2025-05-14T17:50:38.227Z" }, - { url = "https://files.pythonhosted.org/packages/3c/35/f74add3978c20de6323fb11cb5162702670cc7a9420033befb43d8d5b7a4/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e", size = 3303364, upload-time = "2025-05-14T17:51:49.829Z" }, - { url = "https://files.pythonhosted.org/packages/6a/d4/c990f37f52c3f7748ebe98883e2a0f7d038108c2c5a82468d1ff3eec50b7/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078", size = 3229072, upload-time = "2025-05-14T17:50:39.774Z" }, - { url = "https://files.pythonhosted.org/packages/15/69/cab11fecc7eb64bc561011be2bd03d065b762d87add52a4ca0aca2e12904/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae", size = 3268074, upload-time = "2025-05-14T17:51:51.736Z" }, - { url = "https://files.pythonhosted.org/packages/5c/ca/0c19ec16858585d37767b167fc9602593f98998a68a798450558239fb04a/sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6", size = 2084514, upload-time = "2025-05-14T17:55:49.915Z" }, - { url = "https://files.pythonhosted.org/packages/7f/23/4c2833d78ff3010a4e17f984c734f52b531a8c9060a50429c9d4b0211be6/sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0", size = 2111557, upload-time = "2025-05-14T17:55:51.349Z" }, - { url = "https://files.pythonhosted.org/packages/d3/ad/2e1c6d4f235a97eeef52d0200d8ddda16f6c4dd70ae5ad88c46963440480/sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443", size = 2115491, upload-time = "2025-05-14T17:55:31.177Z" }, - { url = "https://files.pythonhosted.org/packages/cf/8d/be490e5db8400dacc89056f78a52d44b04fbf75e8439569d5b879623a53b/sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc", size = 2102827, upload-time = "2025-05-14T17:55:34.921Z" }, - { url = "https://files.pythonhosted.org/packages/a0/72/c97ad430f0b0e78efaf2791342e13ffeafcbb3c06242f01a3bb8fe44f65d/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1", size = 3225224, upload-time = "2025-05-14T17:50:41.418Z" }, - { url = "https://files.pythonhosted.org/packages/5e/51/5ba9ea3246ea068630acf35a6ba0d181e99f1af1afd17e159eac7e8bc2b8/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a", size = 3230045, upload-time = "2025-05-14T17:51:54.722Z" }, - { url = "https://files.pythonhosted.org/packages/78/2f/8c14443b2acea700c62f9b4a8bad9e49fc1b65cfb260edead71fd38e9f19/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d", size = 3159357, upload-time = "2025-05-14T17:50:43.483Z" }, - { url = "https://files.pythonhosted.org/packages/fc/b2/43eacbf6ccc5276d76cea18cb7c3d73e294d6fb21f9ff8b4eef9b42bbfd5/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23", size = 3197511, upload-time = "2025-05-14T17:51:57.308Z" }, - { url = "https://files.pythonhosted.org/packages/fa/2e/677c17c5d6a004c3c45334ab1dbe7b7deb834430b282b8a0f75ae220c8eb/sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f", size = 2082420, upload-time = "2025-05-14T17:55:52.69Z" }, - { url = "https://files.pythonhosted.org/packages/e9/61/e8c1b9b6307c57157d328dd8b8348ddc4c47ffdf1279365a13b2b98b8049/sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df", size = 2108329, upload-time = "2025-05-14T17:55:54.495Z" }, - { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" }, -] - [[package]] name = "sse-starlette" version = "2.4.1" @@ -6550,6 +6823,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154, upload-time = "2024-03-12T14:34:36.569Z" }, ] +[[package]] +name = "text-generation" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "huggingface-hub" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/53/1b2dc20686079464ae381f230a9fc412984a4255cea73c21afb6a46bc21f/text_generation-0.7.0.tar.gz", hash = "sha256:689200cd1f0d4141562af2515393c2c21cdbd9fac21c8398bf3043cdcc14184e", size = 10373, upload-time = "2024-03-22T16:09:22.167Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/79/8fc351fd919a41287243c998a47692c7eb0fa5acded13db0080f2c6f1852/text_generation-0.7.0-py3-none-any.whl", hash = "sha256:02ab337a0ee0e7c70e04a607b311c261caae74bde46a7d837c6fdd150108f4d8", size = 12718, upload-time = "2024-03-22T16:09:20.874Z" }, +] + [[package]] name = "text-unidecode" version = "1.3" @@ -6643,41 +6930,60 @@ wheels = [ [[package]] name = "tomli" -version = "2.2.1" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/ed/3f73f72945444548f33eba9a87fc7a6e969915e7b1acc8260b30e1f76a2f/tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549", size = 17392, upload-time = "2025-10-08T22:01:47.119Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/2e/299f62b401438d5fe1624119c723f5d877acc86a4c2492da405626665f12/tomli-2.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:88bd15eb972f3664f5ed4b57c1634a97153b4bac4479dcb6a495f41921eb7f45", size = 153236, upload-time = "2025-10-08T22:01:00.137Z" }, + { url = "https://files.pythonhosted.org/packages/86/7f/d8fffe6a7aefdb61bced88fcb5e280cfd71e08939da5894161bd71bea022/tomli-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:883b1c0d6398a6a9d29b508c331fa56adbcdff647f6ace4dfca0f50e90dfd0ba", size = 148084, upload-time = "2025-10-08T22:01:01.63Z" }, + { url = "https://files.pythonhosted.org/packages/47/5c/24935fb6a2ee63e86d80e4d3b58b222dafaf438c416752c8b58537c8b89a/tomli-2.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d1381caf13ab9f300e30dd8feadb3de072aeb86f1d34a8569453ff32a7dea4bf", size = 234832, upload-time = "2025-10-08T22:01:02.543Z" }, + { url = "https://files.pythonhosted.org/packages/89/da/75dfd804fc11e6612846758a23f13271b76d577e299592b4371a4ca4cd09/tomli-2.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0e285d2649b78c0d9027570d4da3425bdb49830a6156121360b3f8511ea3441", size = 242052, upload-time = "2025-10-08T22:01:03.836Z" }, + { url = "https://files.pythonhosted.org/packages/70/8c/f48ac899f7b3ca7eb13af73bacbc93aec37f9c954df3c08ad96991c8c373/tomli-2.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0a154a9ae14bfcf5d8917a59b51ffd5a3ac1fd149b71b47a3a104ca4edcfa845", size = 239555, upload-time = "2025-10-08T22:01:04.834Z" }, + { url = "https://files.pythonhosted.org/packages/ba/28/72f8afd73f1d0e7829bfc093f4cb98ce0a40ffc0cc997009ee1ed94ba705/tomli-2.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:74bf8464ff93e413514fefd2be591c3b0b23231a77f901db1eb30d6f712fc42c", size = 245128, upload-time = "2025-10-08T22:01:05.84Z" }, + { url = "https://files.pythonhosted.org/packages/b6/eb/a7679c8ac85208706d27436e8d421dfa39d4c914dcf5fa8083a9305f58d9/tomli-2.3.0-cp311-cp311-win32.whl", hash = "sha256:00b5f5d95bbfc7d12f91ad8c593a1659b6387b43f054104cda404be6bda62456", size = 96445, upload-time = "2025-10-08T22:01:06.896Z" }, + { url = "https://files.pythonhosted.org/packages/0a/fe/3d3420c4cb1ad9cb462fb52967080575f15898da97e21cb6f1361d505383/tomli-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:4dc4ce8483a5d429ab602f111a93a6ab1ed425eae3122032db7e9acf449451be", size = 107165, upload-time = "2025-10-08T22:01:08.107Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b7/40f36368fcabc518bb11c8f06379a0fd631985046c038aca08c6d6a43c6e/tomli-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d7d86942e56ded512a594786a5ba0a5e521d02529b3826e7761a05138341a2ac", size = 154891, upload-time = "2025-10-08T22:01:09.082Z" }, + { url = "https://files.pythonhosted.org/packages/f9/3f/d9dd692199e3b3aab2e4e4dd948abd0f790d9ded8cd10cbaae276a898434/tomli-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ee0b47d4dad1c5e996e3cd33b8a76a50167ae5f96a2607cbe8cc773506ab22", size = 148796, upload-time = "2025-10-08T22:01:10.266Z" }, + { url = "https://files.pythonhosted.org/packages/60/83/59bff4996c2cf9f9387a0f5a3394629c7efa5ef16142076a23a90f1955fa/tomli-2.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:792262b94d5d0a466afb5bc63c7daa9d75520110971ee269152083270998316f", size = 242121, upload-time = "2025-10-08T22:01:11.332Z" }, + { url = "https://files.pythonhosted.org/packages/45/e5/7c5119ff39de8693d6baab6c0b6dcb556d192c165596e9fc231ea1052041/tomli-2.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f195fe57ecceac95a66a75ac24d9d5fbc98ef0962e09b2eddec5d39375aae52", size = 250070, upload-time = "2025-10-08T22:01:12.498Z" }, + { url = "https://files.pythonhosted.org/packages/45/12/ad5126d3a278f27e6701abde51d342aa78d06e27ce2bb596a01f7709a5a2/tomli-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e31d432427dcbf4d86958c184b9bfd1e96b5b71f8eb17e6d02531f434fd335b8", size = 245859, upload-time = "2025-10-08T22:01:13.551Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a1/4d6865da6a71c603cfe6ad0e6556c73c76548557a8d658f9e3b142df245f/tomli-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b0882799624980785240ab732537fcfc372601015c00f7fc367c55308c186f6", size = 250296, upload-time = "2025-10-08T22:01:14.614Z" }, + { url = "https://files.pythonhosted.org/packages/a0/b7/a7a7042715d55c9ba6e8b196d65d2cb662578b4d8cd17d882d45322b0d78/tomli-2.3.0-cp312-cp312-win32.whl", hash = "sha256:ff72b71b5d10d22ecb084d345fc26f42b5143c5533db5e2eaba7d2d335358876", size = 97124, upload-time = "2025-10-08T22:01:15.629Z" }, + { url = "https://files.pythonhosted.org/packages/06/1e/f22f100db15a68b520664eb3328fb0ae4e90530887928558112c8d1f4515/tomli-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:1cb4ed918939151a03f33d4242ccd0aa5f11b3547d0cf30f7c74a408a5b99878", size = 107698, upload-time = "2025-10-08T22:01:16.51Z" }, + { url = "https://files.pythonhosted.org/packages/89/48/06ee6eabe4fdd9ecd48bf488f4ac783844fd777f547b8d1b61c11939974e/tomli-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5192f562738228945d7b13d4930baffda67b69425a7f0da96d360b0a3888136b", size = 154819, upload-time = "2025-10-08T22:01:17.964Z" }, + { url = "https://files.pythonhosted.org/packages/f1/01/88793757d54d8937015c75dcdfb673c65471945f6be98e6a0410fba167ed/tomli-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:be71c93a63d738597996be9528f4abe628d1adf5e6eb11607bc8fe1a510b5dae", size = 148766, upload-time = "2025-10-08T22:01:18.959Z" }, + { url = "https://files.pythonhosted.org/packages/42/17/5e2c956f0144b812e7e107f94f1cc54af734eb17b5191c0bbfb72de5e93e/tomli-2.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4665508bcbac83a31ff8ab08f424b665200c0e1e645d2bd9ab3d3e557b6185b", size = 240771, upload-time = "2025-10-08T22:01:20.106Z" }, + { url = "https://files.pythonhosted.org/packages/d5/f4/0fbd014909748706c01d16824eadb0307115f9562a15cbb012cd9b3512c5/tomli-2.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4021923f97266babc6ccab9f5068642a0095faa0a51a246a6a02fccbb3514eaf", size = 248586, upload-time = "2025-10-08T22:01:21.164Z" }, + { url = "https://files.pythonhosted.org/packages/30/77/fed85e114bde5e81ecf9bc5da0cc69f2914b38f4708c80ae67d0c10180c5/tomli-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4ea38c40145a357d513bffad0ed869f13c1773716cf71ccaa83b0fa0cc4e42f", size = 244792, upload-time = "2025-10-08T22:01:22.417Z" }, + { url = "https://files.pythonhosted.org/packages/55/92/afed3d497f7c186dc71e6ee6d4fcb0acfa5f7d0a1a2878f8beae379ae0cc/tomli-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad805ea85eda330dbad64c7ea7a4556259665bdf9d2672f5dccc740eb9d3ca05", size = 248909, upload-time = "2025-10-08T22:01:23.859Z" }, + { url = "https://files.pythonhosted.org/packages/f8/84/ef50c51b5a9472e7265ce1ffc7f24cd4023d289e109f669bdb1553f6a7c2/tomli-2.3.0-cp313-cp313-win32.whl", hash = "sha256:97d5eec30149fd3294270e889b4234023f2c69747e555a27bd708828353ab606", size = 96946, upload-time = "2025-10-08T22:01:24.893Z" }, + { url = "https://files.pythonhosted.org/packages/b2/b7/718cd1da0884f281f95ccfa3a6cc572d30053cba64603f79d431d3c9b61b/tomli-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0c95ca56fbe89e065c6ead5b593ee64b84a26fca063b5d71a1122bf26e533999", size = 107705, upload-time = "2025-10-08T22:01:26.153Z" }, + { url = "https://files.pythonhosted.org/packages/19/94/aeafa14a52e16163008060506fcb6aa1949d13548d13752171a755c65611/tomli-2.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cebc6fe843e0733ee827a282aca4999b596241195f43b4cc371d64fc6639da9e", size = 154244, upload-time = "2025-10-08T22:01:27.06Z" }, + { url = "https://files.pythonhosted.org/packages/db/e4/1e58409aa78eefa47ccd19779fc6f36787edbe7d4cd330eeeedb33a4515b/tomli-2.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4c2ef0244c75aba9355561272009d934953817c49f47d768070c3c94355c2aa3", size = 148637, upload-time = "2025-10-08T22:01:28.059Z" }, + { url = "https://files.pythonhosted.org/packages/26/b6/d1eccb62f665e44359226811064596dd6a366ea1f985839c566cd61525ae/tomli-2.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c22a8bf253bacc0cf11f35ad9808b6cb75ada2631c2d97c971122583b129afbc", size = 241925, upload-time = "2025-10-08T22:01:29.066Z" }, + { url = "https://files.pythonhosted.org/packages/70/91/7cdab9a03e6d3d2bb11beae108da5bdc1c34bdeb06e21163482544ddcc90/tomli-2.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0eea8cc5c5e9f89c9b90c4896a8deefc74f518db5927d0e0e8d4a80953d774d0", size = 249045, upload-time = "2025-10-08T22:01:31.98Z" }, + { url = "https://files.pythonhosted.org/packages/15/1b/8c26874ed1f6e4f1fcfeb868db8a794cbe9f227299402db58cfcc858766c/tomli-2.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b74a0e59ec5d15127acdabd75ea17726ac4c5178ae51b85bfe39c4f8a278e879", size = 245835, upload-time = "2025-10-08T22:01:32.989Z" }, + { url = "https://files.pythonhosted.org/packages/fd/42/8e3c6a9a4b1a1360c1a2a39f0b972cef2cc9ebd56025168c4137192a9321/tomli-2.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b5870b50c9db823c595983571d1296a6ff3e1b88f734a4c8f6fc6188397de005", size = 253109, upload-time = "2025-10-08T22:01:34.052Z" }, + { url = "https://files.pythonhosted.org/packages/22/0c/b4da635000a71b5f80130937eeac12e686eefb376b8dee113b4a582bba42/tomli-2.3.0-cp314-cp314-win32.whl", hash = "sha256:feb0dacc61170ed7ab602d3d972a58f14ee3ee60494292d384649a3dc38ef463", size = 97930, upload-time = "2025-10-08T22:01:35.082Z" }, + { url = "https://files.pythonhosted.org/packages/b9/74/cb1abc870a418ae99cd5c9547d6bce30701a954e0e721821df483ef7223c/tomli-2.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:b273fcbd7fc64dc3600c098e39136522650c49bca95df2d11cf3b626422392c8", size = 107964, upload-time = "2025-10-08T22:01:36.057Z" }, + { url = "https://files.pythonhosted.org/packages/54/78/5c46fff6432a712af9f792944f4fcd7067d8823157949f4e40c56b8b3c83/tomli-2.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:940d56ee0410fa17ee1f12b817b37a4d4e4dc4d27340863cc67236c74f582e77", size = 163065, upload-time = "2025-10-08T22:01:37.27Z" }, + { url = "https://files.pythonhosted.org/packages/39/67/f85d9bd23182f45eca8939cd2bc7050e1f90c41f4a2ecbbd5963a1d1c486/tomli-2.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f85209946d1fe94416debbb88d00eb92ce9cd5266775424ff81bc959e001acaf", size = 159088, upload-time = "2025-10-08T22:01:38.235Z" }, + { url = "https://files.pythonhosted.org/packages/26/5a/4b546a0405b9cc0659b399f12b6adb750757baf04250b148d3c5059fc4eb/tomli-2.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a56212bdcce682e56b0aaf79e869ba5d15a6163f88d5451cbde388d48b13f530", size = 268193, upload-time = "2025-10-08T22:01:39.712Z" }, + { url = "https://files.pythonhosted.org/packages/42/4f/2c12a72ae22cf7b59a7fe75b3465b7aba40ea9145d026ba41cb382075b0e/tomli-2.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c5f3ffd1e098dfc032d4d3af5c0ac64f6d286d98bc148698356847b80fa4de1b", size = 275488, upload-time = "2025-10-08T22:01:40.773Z" }, + { url = "https://files.pythonhosted.org/packages/92/04/a038d65dbe160c3aa5a624e93ad98111090f6804027d474ba9c37c8ae186/tomli-2.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e01decd096b1530d97d5d85cb4dff4af2d8347bd35686654a004f8dea20fc67", size = 272669, upload-time = "2025-10-08T22:01:41.824Z" }, + { url = "https://files.pythonhosted.org/packages/be/2f/8b7c60a9d1612a7cbc39ffcca4f21a73bf368a80fc25bccf8253e2563267/tomli-2.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8a35dd0e643bb2610f156cca8db95d213a90015c11fee76c946aa62b7ae7e02f", size = 279709, upload-time = "2025-10-08T22:01:43.177Z" }, + { url = "https://files.pythonhosted.org/packages/7e/46/cc36c679f09f27ded940281c38607716c86cf8ba4a518d524e349c8b4874/tomli-2.3.0-cp314-cp314t-win32.whl", hash = "sha256:a1f7f282fe248311650081faafa5f4732bdbfef5d45fe3f2e702fbc6f2d496e0", size = 107563, upload-time = "2025-10-08T22:01:44.233Z" }, + { url = "https://files.pythonhosted.org/packages/84/ff/426ca8683cf7b753614480484f6437f568fd2fda2edbdf57a2d3d8b27a0b/tomli-2.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:70a251f8d4ba2d9ac2542eecf008b3c8a9fc5c3f9f02c56a9d7952612be2fdba", size = 119756, upload-time = "2025-10-08T22:01:45.234Z" }, + { url = "https://files.pythonhosted.org/packages/77/b8/0135fadc89e73be292b473cb820b4f5a08197779206b33191e801feeae40/tomli-2.3.0-py3-none-any.whl", hash = "sha256:e95b1af3c5b07d9e643909b5abbec77cd9f1217e6d0bca72b0234736b9fb1f1b", size = 14408, upload-time = "2025-10-08T22:01:46.04Z" }, +] + +[[package]] +name = "tomli-w" +version = "1.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175, upload-time = "2024-11-27T22:38:36.873Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077, upload-time = "2024-11-27T22:37:54.956Z" }, - { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429, upload-time = "2024-11-27T22:37:56.698Z" }, - { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067, upload-time = "2024-11-27T22:37:57.63Z" }, - { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030, upload-time = "2024-11-27T22:37:59.344Z" }, - { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898, upload-time = "2024-11-27T22:38:00.429Z" }, - { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894, upload-time = "2024-11-27T22:38:02.094Z" }, - { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319, upload-time = "2024-11-27T22:38:03.206Z" }, - { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273, upload-time = "2024-11-27T22:38:04.217Z" }, - { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310, upload-time = "2024-11-27T22:38:05.908Z" }, - { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309, upload-time = "2024-11-27T22:38:06.812Z" }, - { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762, upload-time = "2024-11-27T22:38:07.731Z" }, - { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453, upload-time = "2024-11-27T22:38:09.384Z" }, - { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486, upload-time = "2024-11-27T22:38:10.329Z" }, - { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349, upload-time = "2024-11-27T22:38:11.443Z" }, - { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159, upload-time = "2024-11-27T22:38:13.099Z" }, - { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243, upload-time = "2024-11-27T22:38:14.766Z" }, - { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645, upload-time = "2024-11-27T22:38:15.843Z" }, - { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584, upload-time = "2024-11-27T22:38:17.645Z" }, - { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875, upload-time = "2024-11-27T22:38:19.159Z" }, - { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418, upload-time = "2024-11-27T22:38:20.064Z" }, - { url = "https://files.pythonhosted.org/packages/04/90/2ee5f2e0362cb8a0b6499dc44f4d7d48f8fff06d28ba46e6f1eaa61a1388/tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7", size = 132708, upload-time = "2024-11-27T22:38:21.659Z" }, - { url = "https://files.pythonhosted.org/packages/c0/ec/46b4108816de6b385141f082ba99e315501ccd0a2ea23db4a100dd3990ea/tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c", size = 123582, upload-time = "2024-11-27T22:38:22.693Z" }, - { url = "https://files.pythonhosted.org/packages/a0/bd/b470466d0137b37b68d24556c38a0cc819e8febe392d5b199dcd7f578365/tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13", size = 232543, upload-time = "2024-11-27T22:38:24.367Z" }, - { url = "https://files.pythonhosted.org/packages/d9/e5/82e80ff3b751373f7cead2815bcbe2d51c895b3c990686741a8e56ec42ab/tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281", size = 241691, upload-time = "2024-11-27T22:38:26.081Z" }, - { url = "https://files.pythonhosted.org/packages/05/7e/2a110bc2713557d6a1bfb06af23dd01e7dde52b6ee7dadc589868f9abfac/tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272", size = 251170, upload-time = "2024-11-27T22:38:27.921Z" }, - { url = "https://files.pythonhosted.org/packages/64/7b/22d713946efe00e0adbcdfd6d1aa119ae03fd0b60ebed51ebb3fa9f5a2e5/tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140", size = 236530, upload-time = "2024-11-27T22:38:29.591Z" }, - { url = "https://files.pythonhosted.org/packages/38/31/3a76f67da4b0cf37b742ca76beaf819dca0ebef26d78fc794a576e08accf/tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2", size = 258666, upload-time = "2024-11-27T22:38:30.639Z" }, - { url = "https://files.pythonhosted.org/packages/07/10/5af1293da642aded87e8a988753945d0cf7e00a9452d3911dd3bb354c9e2/tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744", size = 243954, upload-time = "2024-11-27T22:38:31.702Z" }, - { url = "https://files.pythonhosted.org/packages/5b/b9/1ed31d167be802da0fc95020d04cd27b7d7065cc6fbefdd2f9186f60d7bd/tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec", size = 98724, upload-time = "2024-11-27T22:38:32.837Z" }, - { url = "https://files.pythonhosted.org/packages/c7/32/b0963458706accd9afcfeb867c0f9175a741bf7b19cd424230714d722198/tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69", size = 109383, upload-time = "2024-11-27T22:38:34.455Z" }, - { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/19/75/241269d1da26b624c0d5e110e8149093c759b7a286138f4efd61a60e75fe/tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021", size = 7184, upload-time = "2025-01-15T12:07:24.262Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90", size = 6675, upload-time = "2025-01-15T12:07:22.074Z" }, ] [[package]] @@ -6952,6 +7258,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1b/d8/83790d67ec771bf029a45ff1bd1aedbb738d8aa58c09dd0cc3033eea0e69/types_setuptools-80.9.0.20250529-py3-none-any.whl", hash = "sha256:00dfcedd73e333a430e10db096e4d46af93faf9314f832f13b6bbe3d6757e95f", size = 63263, upload-time = "2025-05-29T03:07:33.064Z" }, ] +[[package]] +name = "types-tqdm" +version = "4.67.0.20250809" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/d0/cf498fc630d9fdaf2428b93e60b0e67b08008fec22b78716b8323cf644dc/types_tqdm-4.67.0.20250809.tar.gz", hash = "sha256:02bf7ab91256080b9c4c63f9f11b519c27baaf52718e5fdab9e9606da168d500", size = 17200, upload-time = "2025-08-09T03:17:43.489Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/13/3ff0781445d7c12730befce0fddbbc7a76e56eb0e7029446f2853238360a/types_tqdm-4.67.0.20250809-py3-none-any.whl", hash = "sha256:1a73053b31fcabf3c1f3e2a9d5ecdba0f301bde47a418cd0e0bdf774827c5c57", size = 24020, upload-time = "2025-08-09T03:17:42.453Z" }, +] + [[package]] name = "typing-extensions" version = "4.14.1" From b57ad2c355abe403af944d85e8a9949e452b6b43 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Tue, 18 Nov 2025 22:27:13 -0800 Subject: [PATCH 05/10] updates --- .../pytest/integrations/openenv_trl_vllm.py | 4 ++++ tests/pytest/test_openenv_echo_hub.py | 22 ++++++++++++++----- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/eval_protocol/pytest/integrations/openenv_trl_vllm.py b/eval_protocol/pytest/integrations/openenv_trl_vllm.py index 58d1fbd8..36d82376 100644 --- a/eval_protocol/pytest/integrations/openenv_trl_vllm.py +++ b/eval_protocol/pytest/integrations/openenv_trl_vllm.py @@ -131,11 +131,15 @@ def rollout_func(prompts: List[str], trainer) -> Dict[str, List]: flush=True, ) + # Import default logger for local tracing + from eval_protocol.dataset_logger import default_logger + config = RolloutProcessorConfig( completion_params=base_params, mcp_config_path="", semaphore=asyncio.Semaphore(max_concurrency), steps=max_steps, + logger=default_logger, ) # 3) Execute rollouts with VLLMPolicy diff --git a/tests/pytest/test_openenv_echo_hub.py b/tests/pytest/test_openenv_echo_hub.py index 7ddd2b8c..d252fc26 100644 --- a/tests/pytest/test_openenv_echo_hub.py +++ b/tests/pytest/test_openenv_echo_hub.py @@ -2,11 +2,11 @@ import os import re + from eval_protocol.models import EvaluationRow, Message, EvaluateResult from eval_protocol.pytest import evaluation_test from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor import pytest -import os # Skip these integration-heavy tests on CI runners by default pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI") @@ -45,14 +45,21 @@ def action_parser(response_text: str): try: from envs.echo_env import EchoEnv # type: ignore + _HAS_ECHO = True except Exception: _HAS_ECHO = False +# Inline test data +ECHO_INLINE_DATA: List[Dict[str, Any]] = [ + {"id": "echo-1", "prompt": "hello"}, + {"id": "echo-2", "prompt": "test message"}, +] + + @evaluation_test( # type: ignore[misc] - input_dataset=["tests/pytest/data/echo_dataset.jsonl"], - dataset_adapter=echo_dataset_to_rows, + input_rows=[echo_dataset_to_rows(ECHO_INLINE_DATA)], completion_params=[ { "temperature": 0.0, @@ -93,8 +100,13 @@ def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow: # Preferred path: system sentinel "__ep_step_rewards__" step_rewards: List[float] = [] for msg in row.messages or []: - if msg.role == "system" and isinstance(msg.content, str) and msg.content.startswith("__ep_step_rewards__:"): + if ( + msg.role == "system" + and isinstance(msg.content, str) + and msg.content.startswith("__ep_step_rewards__:") + ): import json as _json + payload = msg.content.split(":", 1)[1] step_rewards = _json.loads(payload) or [] break @@ -105,5 +117,3 @@ def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow: score = max(0.0, min(1.0, total_reward)) row.evaluation_result = EvaluateResult(score=score, reason=f"Echo total reward={total_reward:.2f}") return row - - From 70f3d0eb2095869596b5def8e0a5fddd999e1e53 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Wed, 19 Nov 2025 11:36:12 +0000 Subject: [PATCH 06/10] final --- eval_protocol/mcp/execution/vllm_policy.py | 177 ++++++++ .../pytest/integrations/openenv_trl_vllm.py | 395 +++++++++++++----- .../pytest/openenv_rollout_processor.py | 32 +- tests/pytest/test_openenv_browsergym_eval.py | 2 +- tests/pytest/test_openenv_textarena_docker.py | 168 ++++++++ 5 files changed, 670 insertions(+), 104 deletions(-) create mode 100644 eval_protocol/mcp/execution/vllm_policy.py create mode 100644 tests/pytest/test_openenv_textarena_docker.py diff --git a/eval_protocol/mcp/execution/vllm_policy.py b/eval_protocol/mcp/execution/vllm_policy.py new file mode 100644 index 00000000..8a8f0d5c --- /dev/null +++ b/eval_protocol/mcp/execution/vllm_policy.py @@ -0,0 +1,177 @@ +""" +VLLMPolicy - Policy for TRL's VLLMClient + +Simple policy that calls TRL's vllm_client directly instead of going through LiteLLM. +Works with `trl vllm-serve` endpoints. +""" + +from typing import Any, Dict, List, Optional + + +class VLLMPolicy: + """ + Policy that uses TRL's VLLMClient for generation. + + This is designed to work with `trl vllm-serve` which provides + custom /generate/ and /chat/ endpoints. + """ + + def __init__( + self, + vllm_client, # trainer.vllm_client + tokenizer=None, # Optional tokenizer for decoding + temperature: float = 1.0, + max_tokens: int = 100, + top_p: Optional[float] = None, + top_k: Optional[int] = None, + **kwargs, + ): + """ + Initialize VLLMPolicy. + + Args: + vllm_client: TRL's VLLMClient instance (from trainer.vllm_client) + tokenizer: Optional tokenizer for decoding token IDs to text + temperature: Sampling temperature + max_tokens: Maximum tokens to generate + top_p: Top-p sampling + top_k: Top-k sampling + **kwargs: Additional generation parameters + """ + self.vllm_client = vllm_client + self.tokenizer = tokenizer + self.temperature = temperature + self.max_tokens = max_tokens + self.top_p = top_p if top_p is not None else 1.0 + self.top_k = top_k if top_k is not None else -1 + self.kwargs = kwargs + + async def _make_llm_call( + self, + messages: List[Dict[str, Any]], + tools: Optional[List] = None, + ) -> Dict[str, Any]: + """ + Make LLM call using TRL's VLLMClient. + + Args: + messages: List of message dicts with 'role' and 'content' + tools: Not used (for compatibility) + + Returns: + OpenAI-compatible response dict + """ + # Apply chat template to convert messages to a prompt string + if self.tokenizer is not None: + try: + # Use tokenizer's chat template + prompt_text = self.tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + tokenize=False, + ) + print("\n[VLLMPolicy] ===== CHAT TEMPLATE APPLIED =====", flush=True) + print(f"[VLLMPolicy] Input messages ({len(messages)} messages):", flush=True) + for i, msg in enumerate(messages): + content_preview = str(msg.get("content", ""))[:100] + print(f" [{i}] {msg.get('role', '?')}: {content_preview}...", flush=True) + print(f"[VLLMPolicy] Formatted prompt (length={len(prompt_text)}):", flush=True) + print("[VLLMPolicy] Prompt preview (last 500 chars):", flush=True) + print(f"{prompt_text[-500:]}", flush=True) + print("[VLLMPolicy] ===================================", flush=True) + except Exception as e: + print(f"[VLLMPolicy] Warning: Failed to apply chat template: {e}", flush=True) + # Fallback: simple concatenation + prompt_text = "\n".join(f"{m['role']}: {m['content']}" for m in messages) + else: + # No tokenizer: simple concatenation + prompt_text = "\n".join(f"{m['role']}: {m['content']}" for m in messages) + + # Check if vllm_client is VLLMClient (server mode) or LLM (colocate mode) + is_llm_object = hasattr(self.vllm_client, "llm_engine") # LLM has llm_engine + + if is_llm_object: + # Colocate mode: use SamplingParams + print("[VLLMPolicy] Using vLLM LLM (colocate mode) with SamplingParams", flush=True) + from vllm import SamplingParams + + sampling_params = SamplingParams( + temperature=self.temperature, + max_tokens=self.max_tokens, + top_p=self.top_p, + top_k=self.top_k, + n=1, + ) + + print("[VLLMPolicy] Calling LLM.generate()...", flush=True) + outputs = self.vllm_client.generate([prompt_text], sampling_params=sampling_params, use_tqdm=False) + + # Extract from vLLM output format + output = outputs[0] + prompt_ids = output.prompt_token_ids + completion_ids = output.outputs[0].token_ids + response = { + "prompt_ids": [prompt_ids], + "completion_ids": [completion_ids], + } + else: + # Server mode: use VLLMClient with kwargs + print("[VLLMPolicy] Using VLLMClient (server mode)", flush=True) + vllm_params = { + "temperature": self.temperature, + "max_tokens": self.max_tokens, + "top_p": self.top_p, + "top_k": self.top_k, + "n": 1, + } + vllm_params.update(self.kwargs) + + print("[VLLMPolicy] Calling vllm_client.generate()...", flush=True) + response = self.vllm_client.generate( + prompts=[prompt_text], + **vllm_params, + ) + + # Extract first result + prompt_ids = response["prompt_ids"][0] + completion_ids = response["completion_ids"][0] + + # Decode completion text if tokenizer available + if self.tokenizer is not None: + try: + completion_text = self.tokenizer.decode(completion_ids, skip_special_tokens=True) + print("\n[VLLMPolicy] ===== GENERATION RESULT =====", flush=True) + print(f"[VLLMPolicy] Prompt tokens: {len(prompt_ids)}", flush=True) + print(f"[VLLMPolicy] Completion tokens: {len(completion_ids)}", flush=True) + print(f"[VLLMPolicy] FULL decoded completion ({len(completion_text)} chars):", flush=True) + print("───────────────────────────────────────", flush=True) + print(f"{completion_text}", flush=True) + print("───────────────────────────────────────", flush=True) + print("[VLLMPolicy] ==============================", flush=True) + except Exception as e: + print(f"[VLLMPolicy] Warning: Failed to decode completion: {e}", flush=True) + completion_text = f"" + else: + # Fallback: just indicate number of tokens + completion_text = f"<{len(completion_ids)}_tokens>" + + # Convert to OpenAI-compatible format for compatibility with OpenEnvRolloutProcessor + # Also include raw token IDs for TRL integration (avoids double encoding) + return { + "choices": [ + { + "message": { + "content": completion_text, + "role": "assistant", + } + } + ], + "usage": { + "prompt_tokens": len(prompt_ids), + "completion_tokens": len(completion_ids), + "total_tokens": len(prompt_ids) + len(completion_ids), + }, + # Include raw token IDs for TRL (avoids re-encoding) + "prompt_ids": prompt_ids, + "completion_ids": completion_ids, + } diff --git a/eval_protocol/pytest/integrations/openenv_trl_vllm.py b/eval_protocol/pytest/integrations/openenv_trl_vllm.py index 36d82376..29b5634a 100644 --- a/eval_protocol/pytest/integrations/openenv_trl_vllm.py +++ b/eval_protocol/pytest/integrations/openenv_trl_vllm.py @@ -14,14 +14,19 @@ from __future__ import annotations import asyncio -import sys -from typing import Any, Callable, Dict, List, Optional, Type +import importlib +import inspect +import logging +from typing import Any, Callable, Dict, List, Optional, Type, cast -from eval_protocol.models import EvaluationRow, InputMetadata, Message +from eval_protocol.models import EvalMetadata, EvaluationRow, InputMetadata, Message from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor from eval_protocol.pytest.types import RolloutProcessorConfig +logger = logging.getLogger(__name__) + + def create_openenv_vllm_rollout_func( env_factory: Callable[[], Any] | None, prompt_builder: Callable[[Any, int, list[str]], Any], @@ -35,6 +40,7 @@ def create_openenv_vllm_rollout_func( processor_cls: Optional[Type[Any]] = OpenEnvRolloutProcessor, processor_kwargs: Optional[Dict[str, Any]] = None, # Environment configuration + env_path: Optional[str] = None, env_client_cls: Optional[Type[Any]] = None, tasks: List[str] | None = None, task_var: Optional[str] = None, @@ -55,26 +61,18 @@ def create_openenv_vllm_rollout_func( """ Build a TRL-compatible ``rollout_func`` using vLLM inference with OpenEnv. - High-level: - - ``GRPOTrainer`` calls the returned ``rollout_func(prompts, trainer)`` - - For each prompt, we create ``num_generations`` evaluation rows - - ``OpenEnvRolloutProcessor`` runs BrowserGym-style episodes via Docker - - ``VLLMPolicy`` formats messages with the chat template and calls TRL's - vLLM server using ``trainer.vllm_client`` - - We accumulate tokens across all turns of an episode and sum rewards, - returning Wordle-style GRPO data. - - The environment side is configured via ``env_client_cls`` and the BrowserGym - parameters (``tasks``, ``miniwob_url``, ``docker_image``, etc.). + ``GRPOTrainer`` calls the returned ``rollout_func(prompts, trainer)``. + For each prompt we run one OpenEnv episode using ``OpenEnvRolloutProcessor`` + and return Wordle-style GRPO data (2D token lists + 1D rewards). """ - print(f"\n{'=' * 80}", flush=True) - print("[openenv_trl_vllm] create_openenv_vllm_rollout_func() CALLED", flush=True) - print(f" vllm_base_url: {vllm_base_url}", flush=True) - print(f" vllm_model: {vllm_model}", flush=True) - print(f" tasks: {tasks}", flush=True) - print(f" max_steps: {max_steps}", flush=True) - print(f"{'=' * 80}", flush=True) - sys.stdout.flush() + logger.info("create_openenv_vllm_rollout_func called") + logger.debug( + "vllm_base_url=%s, vllm_model=%s, tasks=%s, max_steps=%s", + vllm_base_url, + vllm_model, + tasks, + max_steps, + ) # Import VLLMPolicy from eval_protocol.mcp.execution.vllm_policy import VLLMPolicy @@ -86,28 +84,107 @@ def create_openenv_vllm_rollout_func( def rollout_func(prompts: List[str], trainer) -> Dict[str, List]: """Execute rollouts via OpenEnv + vLLM and return GRPO-compatible results.""" - print("\n[OpenEnvVLLM] rollout_func called", flush=True) + logger.info("OpenEnv vLLM rollout_func called") # Extract args from trainer args = trainer.args processing_class = trainer.processing_class num_generations = getattr(args, "num_generations", 8) - print( - f"[OpenEnvVLLM] Received {len(prompts)} prompts, {num_generations} generations each", - flush=True, + eval_name = env_path or "openenv_browsergym_vllm_training" + logger.info( + "[OpenEnvVLLM] Received %d prompts (trainer.num_generations=%s)", + len(prompts), + num_generations, ) + logger.debug("[OpenEnvVLLM] Total rollouts to execute: %d", len(prompts)) + + # Optionally load rollout processor + eval function hints from an + # @evaluation_test via env_path + ep_rollout_processor = None + ep_rollout_processor_kwargs: Dict[str, Any] = {} + ep_mcp_config_path = "" + ep_eval_func = None + + if env_path: + logger.info("[OpenEnvVLLM] Loading evaluation test from env_path='%s'", env_path) + try: + module = importlib.import_module(env_path) + except Exception as e: + raise ImportError(f"Failed to import env module '{env_path}': {e}") from e + + candidate_tests = [ + obj for _, obj in inspect.getmembers(module) if callable(obj) and hasattr(obj, "__ep_params__") + ] + if not candidate_tests: + raise ValueError(f"No @evaluation_test functions found in '{env_path}'.") + + eval_func = candidate_tests[0] + ep_eval_func = eval_func # used later after rollouts complete + ep_params: Dict[str, Any] = getattr(eval_func, "__ep_params__", {}) + ep_rollout_processor = ep_params.get("rollout_processor") + ep_rollout_processor_kwargs = ep_params.get("rollout_processor_kwargs") or {} + ep_mcp_config_path = ep_params.get("mcp_config_path") or "" + logger.info( + "[OpenEnvVLLM] Loaded eval test '%s' with rollout_processor=%s", + getattr(eval_func, "__name__", str(eval_func)), + type(ep_rollout_processor).__name__, + ) + + # 1) Build evaluation rows with rollout_id for logging + import uuid + + # Generate unique IDs for this batch + def _gen_id(): + import random + + words = [ + "quick", + "lazy", + "happy", + "bright", + "calm", + "bold", + "wise", + "kind", + ] + return f"{random.choice(words)}-{random.choice(words)}-{random.randint(10, 99)}" - # 1) Build evaluation rows evaluation_rows: List[EvaluationRow] = [] - for prompt in prompts: - for gen_idx in range(num_generations): - row = EvaluationRow( - messages=[Message(role="user", content=prompt)], - input_metadata=InputMetadata(completion_params={}), - ) - row.input_metadata.generation_idx = gen_idx # type: ignore[attr-defined] - evaluation_rows.append(row) + for prompt_idx, prompt in enumerate(prompts): + # One evaluation row per incoming prompt. GRPOTrainer will handle + # grouping by `num_generations` at the trainer level; the custom + # rollout_func must return one set of tokens per prompt. + rollout_id = f"openenv_vllm_{uuid.uuid4().hex[:12]}" + row_id = _gen_id() + + row = EvaluationRow( + messages=[Message(role="user", content=prompt)], + input_metadata=InputMetadata( + row_id=row_id, # Required for ep logs UI! + completion_params={}, + ), + ) + row.execution_metadata.rollout_id = rollout_id # Required for ep logs! + + # Minimal eval_metadata so ep logs can group/display properly + row.eval_metadata = EvalMetadata( + name=eval_name, + description=None, + version="v1", + status=None, + num_runs=1, + aggregation_method="mean", + passed_threshold=None, + passed=None, + ) + + evaluation_rows.append(row) + + logger.debug( + "[OpenEnvVLLM] Created %d evaluation rows with rollout_ids and row_ids", + len(evaluation_rows), + ) # 2) Build processor config with VLLMPolicy # We'll pass trainer.vllm_client to VLLMPolicy @@ -119,16 +196,18 @@ def rollout_func(prompts: List[str], trainer) -> Dict[str, List]: if completion_params: base_params.update(completion_params) - print( - f"[OpenEnvVLLM] Temperature={base_params['temperature']}, max_tokens={base_params['max_tokens']}", - flush=True, + logger.debug( + "[OpenEnvVLLM] Temperature=%s, max_tokens=%s", + base_params["temperature"], + base_params["max_tokens"], ) - print("[OpenEnvVLLM] Using TRL VLLMClient from trainer", flush=True) + logger.debug("[OpenEnvVLLM] Using TRL VLLMClient from trainer") max_concurrency = concurrency if concurrency is not None else getattr(args, "per_device_train_batch_size", 1) - print( - f"[OpenEnvVLLM] Max concurrency={max_concurrency}, max_steps={max_steps}", - flush=True, + logger.debug( + "[OpenEnvVLLM] Max concurrency=%s, max_steps=%s", + max_concurrency, + max_steps, ) # Import default logger for local tracing @@ -136,24 +215,34 @@ def rollout_func(prompts: List[str], trainer) -> Dict[str, List]: config = RolloutProcessorConfig( completion_params=base_params, - mcp_config_path="", + mcp_config_path=ep_mcp_config_path or "", semaphore=asyncio.Semaphore(max_concurrency), steps=max_steps, logger=default_logger, + kwargs=ep_rollout_processor_kwargs, ) # 3) Execute rollouts with VLLMPolicy - print( - f"[OpenEnvVLLM] Instantiating processor: " - f"{processor_cls.__name__ if processor_cls else 'OpenEnvRolloutProcessor'}", - flush=True, + logger.debug( + "[OpenEnvVLLM] Instantiating processor: %s", + processor_cls.__name__ if processor_cls else "OpenEnvRolloutProcessor", ) - # Create policy factory that uses trainer's vllm_client + # Create policy factory that uses trainer's vllm_client or llm def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs): - """Factory that creates VLLMPolicy using trainer's vllm_client.""" + """Factory that creates VLLMPolicy using trainer's vllm_client or llm.""" + logger.debug( + "[VLLMPolicyFactory] Creating VLLMPolicy with temp=%s, max_tokens=%s", + temperature, + max_tokens, + ) + # Check for vllm_client (server mode) or llm (colocate mode) + vllm_client = getattr(trainer, "vllm_client", None) or getattr(trainer, "llm", None) + if vllm_client is None: + raise RuntimeError("Trainer has neither vllm_client (server mode) nor llm (colocate mode)") + return VLLMPolicy( - vllm_client=trainer.vllm_client, # Use trainer's vLLM client! + vllm_client=vllm_client, # Use trainer's vLLM client! tokenizer=processing_class, # Pass tokenizer for decoding temperature=temperature, max_tokens=max_tokens, @@ -164,6 +253,48 @@ def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs) Processor = processor_cls or OpenEnvRolloutProcessor _kwargs: Dict[str, Any] = dict(processor_kwargs or {}) + + # If env_path was provided and we found an OpenEnvRolloutProcessor in the + # evaluation test, seed processor kwargs from it so users can reuse the + # same environment configuration for training. + if env_path and isinstance(ep_rollout_processor, OpenEnvRolloutProcessor): + logger.debug( + "[OpenEnvVLLM] Seeding processor kwargs from evaluation_test rollout_processor", + ) + _kwargs.setdefault("env_factory", getattr(ep_rollout_processor, "_provided_env_factory", None)) + _kwargs.setdefault("env_client_cls", getattr(ep_rollout_processor, "_env_client_cls", None)) + _kwargs.setdefault("tasks", getattr(ep_rollout_processor, "_tasks", None)) + _kwargs.setdefault("task_var", getattr(ep_rollout_processor, "_task_var", None)) + _kwargs.setdefault("miniwob_url", getattr(ep_rollout_processor, "_miniwob_url", None)) + _kwargs.setdefault("docker_image", getattr(ep_rollout_processor, "_docker_image", None)) + _kwargs.setdefault("env_base_url", getattr(ep_rollout_processor, "_env_base_url", None)) + _kwargs.setdefault( + "request_timeout_s", + getattr(ep_rollout_processor, "_request_timeout_s", None), + ) + _kwargs.setdefault( + "default_headers", + getattr(ep_rollout_processor, "_default_headers", None), + ) + _kwargs.setdefault("provider", getattr(ep_rollout_processor, "_provider", None)) + _kwargs.setdefault("docker_port", getattr(ep_rollout_processor, "_docker_port", None)) + _kwargs.setdefault("env_vars", getattr(ep_rollout_processor, "_env_vars", None)) + _kwargs.setdefault("benchmark", getattr(ep_rollout_processor, "_benchmark", None)) + _kwargs.setdefault("headless", getattr(ep_rollout_processor, "_headless", None)) + _kwargs.setdefault( + "viewport_width", + getattr(ep_rollout_processor, "_viewport_width", None), + ) + _kwargs.setdefault( + "viewport_height", + getattr(ep_rollout_processor, "_viewport_height", None), + ) + _kwargs.setdefault("timeout_ms", getattr(ep_rollout_processor, "_timeout_ms", None)) + _kwargs.setdefault( + "num_generations", + getattr(ep_rollout_processor, "_num_generations", None), + ) + _kwargs.setdefault("env_factory", env_factory) _kwargs.setdefault("prompt_builder", prompt_builder) _kwargs.setdefault("action_parser", action_parser) @@ -179,9 +310,10 @@ def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs) offset = task_cycle_index % len(tasks) rotated_tasks = tasks[offset:] + tasks[:offset] task_cycle_index = (task_cycle_index + 1) % len(tasks) - print( - f"[OpenEnvVLLM] Task rotation offset={offset}, rotated={rotated_tasks}", - flush=True, + logger.debug( + "[OpenEnvVLLM] Task rotation offset=%s, rotated=%s", + offset, + rotated_tasks, ) _kwargs.setdefault("tasks", rotated_tasks) _kwargs.setdefault("task_var", task_var) @@ -202,58 +334,82 @@ def vllm_policy_factory(model, temperature, max_tokens, base_url=None, **kwargs) _kwargs.setdefault("num_generations", num_generations) processor = Processor(**_kwargs) - print("[OpenEnvVLLM] Processor instantiated successfully", flush=True) + logger.debug("[OpenEnvVLLM] Processor instantiated successfully") loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: - async def _run_all(): - tasks_list = processor(evaluation_rows, config) - return await asyncio.gather(*tasks_list) - - completed_rows = loop.run_until_complete(_run_all()) - print( - f"[OpenEnvVLLM] All rollouts completed: {len(completed_rows)} results", - flush=True, + async def _run_all() -> List[EvaluationRow]: + tasks_list: List[asyncio.Task[EvaluationRow]] = processor(evaluation_rows, config) + rows: List[EvaluationRow] = await asyncio.gather(*tasks_list) + + # Optionally run the @evaluation_test function on each row to + # populate evaluation_result (score/metrics) so the same + # reward logic can be reused across trainers. + if env_path and ep_eval_func is not None: + if inspect.iscoroutinefunction(ep_eval_func): + eval_tasks = [ep_eval_func(row) for row in rows] + rows = cast(List[EvaluationRow], await asyncio.gather(*eval_tasks)) + else: + rows = cast(List[EvaluationRow], [ep_eval_func(row) for row in rows]) + logger.info( + "[OpenEnvVLLM] Applied eval function to %d rows from env_path='%s'", + len(rows), + env_path, + ) + + return rows + + completed_rows: List[EvaluationRow] = loop.run_until_complete(_run_all()) + logger.info( + "[OpenEnvVLLM] All rollouts completed: %d results", + len(completed_rows), ) finally: loop.close() - # 4) Convert to Wordle-style format (no splitting) - # Each completed_row is one rollout with multiple turns - # We .extend() tokens across turns, then .append() per rollout - print( - f"[OpenEnvVLLM] Converting {len(completed_rows)} rollouts to TRL format", - flush=True, + # 4) Convert completed rows to TRL format (one episode per row) + logger.info( + "[OpenEnvVLLM] Converting %d completed rollouts to TRL format", + len(completed_rows), ) tokenizer = getattr(processing_class, "tokenizer", None) or processing_class - encode_fn = getattr(tokenizer, "encode", None) episode_prompt_ids: List[List[int]] = [] episode_completion_ids: List[List[int]] = [] episode_logprobs: List[List[float]] = [] step_rewards_all: List[List[float]] = [] + eval_scores: List[float] = [] for idx, row in enumerate(completed_rows): - # Accumulate tokens across all turns in this rollout - prompt_ids: List[int] = [] # .extend() for each turn - completion_ids: List[int] = [] # .extend() for each turn - logprobs: List[float] = [] # .extend() for each turn + logger.debug( + "[OpenEnvVLLM] Processing rollout %d/%d: %d messages", + idx + 1, + len(completed_rows), + len(row.messages), + ) + + # Prefer raw token IDs stored by the rollout processor in + # execution_metadata.extra to avoid any re-encoding. + prompt_ids: List[int] = [] + completion_ids: List[int] = [] + logprobs: List[float] = [] # We don't currently track per-token logprobs rewards: List[float] = [] - # Go through all messages and accumulate tokens + try: + extra = getattr(row.execution_metadata, "extra", None) + if isinstance(extra, dict): + prompt_ids = list(extra.get("prompt_ids", []) or []) + completion_ids = list(extra.get("completion_ids", []) or []) + except Exception: + prompt_ids = [] + completion_ids = [] + + # Extract step rewards from the sentinel system message for msg in row.messages: - if msg.role == "user": - tokens = encode_fn(msg.content or "") if encode_fn else [] - prompt_ids.extend(tokens) # Accumulate user tokens - elif msg.role == "assistant": - tokens = encode_fn(msg.content or "") if encode_fn else [] - completion_ids.extend(tokens) # Accumulate assistant tokens - logprobs.extend([0.0] * len(tokens)) # Placeholder logprobs - elif msg.role == "system": - # Extract step rewards + if msg.role == "system": try: content = msg.content or "" if isinstance(content, str) and content.startswith("__ep_step_rewards__:"): @@ -264,44 +420,81 @@ async def _run_all(): except Exception: pass - # Fallback for rewards (if extra field exists via model_config extra="allow") - if not rewards: - try: - extra = getattr(row.execution_metadata, "extra", None) - if isinstance(extra, dict): - rewards = extra.get("step_rewards", []) or [] - except Exception: - pass - # Append accumulated tokens for this episode episode_prompt_ids.append(prompt_ids if prompt_ids else [0]) episode_completion_ids.append(completion_ids if completion_ids else [0]) episode_logprobs.append(logprobs if logprobs else [0.0]) step_rewards_all.append(rewards if rewards else [0.0]) + # Also capture evaluation_result.score if the evaluation_test + # populated it, so downstream trainers can reuse the exact same + # scoring logic as the eval harness. + score_val = 0.0 + try: + if getattr(row, "evaluation_result", None) is not None: + score_attr = getattr(row.evaluation_result, "score", None) + if score_attr is not None: + score_val = float(score_attr) + except Exception: + score_val = 0.0 + eval_scores.append(score_val) + + ep_reward = sum(rewards) if rewards else 0.0 + logger.debug( + "[OpenEnvVLLM] Episode %d: prompt_tokens=%d, completion_tokens=%d, reward=%.3f", + idx + 1, + len(prompt_ids), + len(completion_ids), + ep_reward, + ) + total_reward = sum(sum(r) for r in step_rewards_all) avg_reward = total_reward / len(step_rewards_all) if step_rewards_all else 0.0 - print( - f"[OpenEnvVLLM] Total reward={total_reward:.2f}, Avg reward={avg_reward:.2f}", - flush=True, + logger.info( + "[OpenEnvVLLM] ✅ All rollouts complete | total_reward=%.2f, avg_reward=%.2f", + total_reward, + avg_reward, + ) + logger.info( + "[OpenEnvVLLM] Returning %d episodes to GRPO", + len(episode_prompt_ids), ) - print(f"[OpenEnvVLLM] Returning {len(episode_prompt_ids)} episodes", flush=True) - sys.stdout.flush() # Return in Wordle format # Tokens: 2D arrays (accumulate across turns, one list per episode) # Rewards: 1D arrays (one scalar per episode) total_rewards = [sum(r) for r in step_rewards_all] # Sum step rewards per episode - print(f"[OpenEnvVLLM] Episode rewards: {total_rewards}", flush=True) + logger.debug("[OpenEnvVLLM] Episode rewards: %s", total_rewards) + + # Validate token IDs before returning (sanity check only) + vocab_size = len(tokenizer) if hasattr(tokenizer, "__len__") else 200000 + logger.debug("[OpenEnvVLLM] Validating token IDs (vocab_size=%s)...", vocab_size) + for i, (pids, cids) in enumerate(zip(episode_prompt_ids, episode_completion_ids)): + max_p = max(pids) if pids else 0 + max_c = max(cids) if cids else 0 + if max_p >= vocab_size or max_c >= vocab_size: + logger.warning( + "[OpenEnvVLLM] Episode %d: INVALID TOKEN IDS (max_prompt_id=%s, max_completion_id=%s)", + i, + max_p, + max_c, + ) + logger.debug( + "[OpenEnvVLLM] Episode %d: prompt_len=%d, completion_len=%d, max_p_id=%d, max_c_id=%d", + i, + len(pids), + len(cids), + max_p, + max_c, + ) return { "prompt_ids": episode_prompt_ids, # List[List[int]] - tokens per episode "completion_ids": episode_completion_ids, # List[List[int]] - tokens per episode "logprobs": episode_logprobs, # List[List[float]] - logprobs per episode - "step_rewards": total_rewards, # List[float] - total reward per episode (1D!) + "eval_score": eval_scores, } - print(f"[openenv_trl_vllm] Returning rollout_func (type={type(rollout_func)})", flush=True) - sys.stdout.flush() + logger.debug("[openenv_trl_vllm] Returning rollout_func (type=%s)", type(rollout_func)) return rollout_func diff --git a/eval_protocol/pytest/openenv_rollout_processor.py b/eval_protocol/pytest/openenv_rollout_processor.py index c3716c48..a2059ab0 100644 --- a/eval_protocol/pytest/openenv_rollout_processor.py +++ b/eval_protocol/pytest/openenv_rollout_processor.py @@ -255,6 +255,9 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: } step_rewards = [] history: List[str] = [] + # Accumulate token IDs across all turns for training integrations + all_prompt_ids: List[int] = [] + all_completion_ids: List[int] = [] logger.info("[OpenEnvRolloutProcessor] Starting agent loop (max %d steps)", max_steps) @@ -295,7 +298,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: except Exception: pass - # Call model to generate action (LiteLLM handles multimodal!) + # Call model to generate action (LiteLLM or custom policy) logger.debug("[OpenEnvRolloutProcessor] Calling LLM (messages=%d)", len(messages)) response = await policy._make_llm_call( messages=[msg.model_dump() for msg in messages], @@ -329,6 +332,15 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: # Add assistant message (original content) messages.append(Message(role="assistant", content=assistant_message)) + # Accumulate token IDs from this turn for downstream training + if "prompt_ids" in response and "completion_ids" in response: + try: + all_prompt_ids.extend(response["prompt_ids"]) + all_completion_ids.extend(response["completion_ids"]) + except Exception: + # Best-effort only; don't break rollouts if tokens are malformed + pass + # Execute action in environment (OpenEnv standard interface!) with transient-error retries logger.debug("[OpenEnvRolloutProcessor] Executing action in environment") step_attempts = 2 @@ -399,10 +411,25 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: ) row.execution_metadata.duration_seconds = time.perf_counter() - start_time - # Store rewards for TRL reward functions via a system message sentinel + # Store rewards for TRL reward functions sentinel = "__ep_step_rewards__:" + json.dumps(step_rewards) messages.append(Message(role="system", content=sentinel)) + # Attach accumulated token IDs to execution_metadata.extra for + # training integrations (e.g., TRL GRPO) instead of encoding + # them into synthetic system messages. + if all_prompt_ids or all_completion_ids: + try: + extra = getattr(row.execution_metadata, "extra", None) + if not isinstance(extra, dict): + extra = {} + extra["prompt_ids"] = list(all_prompt_ids) + extra["completion_ids"] = list(all_completion_ids) + row.execution_metadata.extra = extra # type: ignore[attr-defined] + except Exception: + # Non-fatal: training integrations can fall back if tokens are missing + pass + total_reward = sum(step_rewards) logger.info("[OpenEnvRolloutProcessor] ✅ ROLLOUT COMPLETE") logger.info("[OpenEnvRolloutProcessor] Steps: %d", len(step_rewards)) @@ -442,6 +469,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: env.close() logger.debug("[OpenEnvRolloutProcessor] Environment closed successfully") except Exception as close_err: + print(f"[OpenEnvRolloutProcessor] Warning: Error closing environment: {close_err}", flush=True) logger.warning( "[OpenEnvRolloutProcessor] Error closing environment: %s", close_err, diff --git a/tests/pytest/test_openenv_browsergym_eval.py b/tests/pytest/test_openenv_browsergym_eval.py index ac2c183f..8b87f423 100644 --- a/tests/pytest/test_openenv_browsergym_eval.py +++ b/tests/pytest/test_openenv_browsergym_eval.py @@ -245,7 +245,7 @@ def action_parser(response_text: str): env_client_cls=BrowserGymEnv if _HAS_BG else None, prompt_builder=prompt_builder, action_parser=action_parser, - tasks=["click-test"], + tasks=["click-test", "click-button"], task_var="BROWSERGYM_TASK_NAME", miniwob_url=os.getenv("MINIWOB_URL", "http://host.docker.internal:8888/miniwob/"), docker_image="browsergym-env:latest", diff --git a/tests/pytest/test_openenv_textarena_docker.py b/tests/pytest/test_openenv_textarena_docker.py new file mode 100644 index 00000000..7e02b190 --- /dev/null +++ b/tests/pytest/test_openenv_textarena_docker.py @@ -0,0 +1,168 @@ +from typing import Any, Dict, List +import os + +from eval_protocol.models import EvaluationRow, Message, EvaluateResult +from eval_protocol.pytest import evaluation_test +from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor +import pytest + +# Skip these integration-heavy tests on CI runners by default +pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI") + + +def textarena_dataset_to_rows(data: List[Dict[str, Any]]) -> List[EvaluationRow]: + """ + Adapter: simple {"id": "...", "prompt": "..."} to EvaluationRows. + """ + rows: List[EvaluationRow] = [] + for row in data: + prompt = str(row.get("prompt", "Let's play")) + rows.append(EvaluationRow(messages=[Message(role="user", content=prompt)])) + return rows + + +def prompt_builder(observation: Any, step: int, history: List[str]) -> str: + """ + Build prompt for TextArena games. + Extract the game prompt and recent messages. + """ + prompt_text = getattr(observation, "prompt", "") + messages = getattr(observation, "messages", []) + + # Format conversation history + history_lines = [] + for msg in messages[-5:]: # Last 5 messages + sender = getattr(msg, "sender_id", "?") + content = getattr(msg, "content", "") + category = getattr(msg, "category", "MESSAGE") + if content: + history_lines.append(f"[{category}] Player {sender}: {content}") + + history_str = "\n".join(history_lines) if history_lines else "[No messages yet]" + + return ( + f"Step {step}\n" + f"Game: {prompt_text}\n\n" + f"Conversation:\n{history_str}\n\n" + f"Your move (reply with your guess or action):" + ) + + +def action_parser(response_text: str): + """ + Convert raw model response to TextArenaAction. + """ + try: + from envs.textarena_env import TextArenaAction # type: ignore + except Exception: + pytest.skip("OpenEnv (envs.textarena_env) is not installed; skipping TextArena test.") + raise + + # Extract the actual guess/action from the response + text = response_text.strip() if isinstance(response_text, str) else "" + + # Try to extract text in brackets [guess] or quotes "guess" + import re + + bracket_match = re.search(r"\[([^\]]+)\]", text) + if bracket_match: + text = bracket_match.group(1).strip() + + return TextArenaAction(message=text or "pass") + + +try: + from envs.textarena_env import TextArenaEnv # type: ignore + + _HAS_TEXTARENA = True +except Exception: + _HAS_TEXTARENA = False + + +# Inline test data +TEXTARENA_INLINE_DATA: List[Dict[str, Any]] = [ + {"id": "wordle-1", "prompt": "Play Wordle"}, + {"id": "wordle-2", "prompt": "Play Wordle"}, + {"id": "wordle-3", "prompt": "Play Wordle"}, +] + + +@evaluation_test( # type: ignore[misc] + input_rows=[textarena_dataset_to_rows(TEXTARENA_INLINE_DATA)], + completion_params=[ + { + "temperature": 0.7, + "max_tokens": 32, + # Any working model with your API key + "model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct", + } + ], + num_runs=1, + max_concurrent_rollouts=2, + mode="pointwise", + rollout_processor=( + OpenEnvRolloutProcessor( + # Use Docker image built from OpenEnv repo + env_client_cls=TextArenaEnv if _HAS_TEXTARENA else None, # type: ignore + docker_image=os.getenv("TEXTARENA_DOCKER_IMAGE", "textarena-env:latest"), + env_vars={ + "TEXTARENA_ENV_ID": os.getenv("TEXTARENA_ENV_ID", "Wordle-v0"), + "TEXTARENA_NUM_PLAYERS": os.getenv("TEXTARENA_NUM_PLAYERS", "1"), + # Optional: add TEXTARENA_KW_* for game-specific kwargs + # e.g., "TEXTARENA_KW_hardcore": "true" + }, + task_var="TEXTARENA_ENV_ID", # Env var for task selection + tasks=None, # Single task per container (set via TEXTARENA_ENV_ID) + prompt_builder=prompt_builder, + action_parser=action_parser, + timeout_ms=10000, + num_generations=1, + ) + if _HAS_TEXTARENA + else None + ), +) +def test_openenv_textarena_docker(row: EvaluationRow) -> EvaluationRow: + """ + Test TextArena (Wordle, GuessTheNumber, etc.) via Docker container. + + Build the image first: + cd /path/to/OpenEnv + docker build -f src/envs/textarena_env/server/Dockerfile -t textarena-env:latest . + + Run with: + TEXTARENA_ENV_ID=Wordle-v0 TEXTARENA_NUM_PLAYERS=1 \\ + FIREWORKS_API_KEY=$FIREWORKS_API_KEY \\ + pytest tests/pytest/test_openenv_textarena_docker.py -vv -s + + Or test other games: + TEXTARENA_ENV_ID=GuessTheNumber-v0 ... + """ + if not _HAS_TEXTARENA: + pytest.skip("OpenEnv (envs.textarena_env) is not installed; skipping TextArena Docker test.") + + # Extract step rewards and compute score + total_reward = 0.0 + try: + step_rewards: List[float] = [] + for msg in row.messages or []: + if ( + msg.role == "system" + and isinstance(msg.content, str) + and msg.content.startswith("__ep_step_rewards__:") + ): + import json + + payload = msg.content.split(":", 1)[1] + step_rewards = json.loads(payload) or [] + break + total_reward = float(sum(step_rewards)) if step_rewards else 0.0 + except Exception: + total_reward = 0.0 + + score = max(0.0, min(1.0, total_reward)) + row.evaluation_result = EvaluateResult( + score=score, + reason=f"TextArena total reward={total_reward:.2f} over {len(step_rewards) if 'step_rewards' in locals() else 0} steps", + ) + return row From a1a973ebf34745d6acaa5ef933308fa44c57617d Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Wed, 19 Nov 2025 11:53:27 +0000 Subject: [PATCH 07/10] finalll --- eval_protocol/mcp/execution/vllm_policy.py | 69 +++++++++++-------- .../pytest/integrations/openenv_trl_vllm.py | 20 +----- .../pytest/openenv_rollout_processor.py | 15 ++-- 3 files changed, 51 insertions(+), 53 deletions(-) diff --git a/eval_protocol/mcp/execution/vllm_policy.py b/eval_protocol/mcp/execution/vllm_policy.py index 8a8f0d5c..109d90ef 100644 --- a/eval_protocol/mcp/execution/vllm_policy.py +++ b/eval_protocol/mcp/execution/vllm_policy.py @@ -1,13 +1,20 @@ """ -VLLMPolicy - Policy for TRL's VLLMClient +VLLMPolicy - Policy for TRL's VLLMClient or colocated vLLM LLM. -Simple policy that calls TRL's vllm_client directly instead of going through LiteLLM. -Works with `trl vllm-serve` endpoints. +Thin adapter that turns Eval Protocol-style message lists into a single prompt, +then calls either: + +- TRL's VLLMClient (server mode), or +- a colocated vLLM LLM instance (SamplingParams mode). """ +import logging from typing import Any, Dict, List, Optional +logger = logging.getLogger(__name__) + + class VLLMPolicy: """ Policy that uses TRL's VLLMClient for generation. @@ -52,7 +59,7 @@ async def _make_llm_call( tools: Optional[List] = None, ) -> Dict[str, Any]: """ - Make LLM call using TRL's VLLMClient. + Make LLM call using TRL's VLLMClient or a colocated vLLM LLM. Args: messages: List of message dicts with 'role' and 'content' @@ -70,29 +77,29 @@ async def _make_llm_call( add_generation_prompt=True, tokenize=False, ) - print("\n[VLLMPolicy] ===== CHAT TEMPLATE APPLIED =====", flush=True) - print(f"[VLLMPolicy] Input messages ({len(messages)} messages):", flush=True) - for i, msg in enumerate(messages): - content_preview = str(msg.get("content", ""))[:100] - print(f" [{i}] {msg.get('role', '?')}: {content_preview}...", flush=True) - print(f"[VLLMPolicy] Formatted prompt (length={len(prompt_text)}):", flush=True) - print("[VLLMPolicy] Prompt preview (last 500 chars):", flush=True) - print(f"{prompt_text[-500:]}", flush=True) - print("[VLLMPolicy] ===================================", flush=True) + logger.debug( + "[VLLMPolicy] Chat template applied for %d messages (prompt length=%d)", + len(messages), + len(prompt_text), + ) except Exception as e: - print(f"[VLLMPolicy] Warning: Failed to apply chat template: {e}", flush=True) - # Fallback: simple concatenation - prompt_text = "\n".join(f"{m['role']}: {m['content']}" for m in messages) + logger.warning( + "[VLLMPolicy] Failed to apply chat template: %s", + e, + exc_info=True, + ) + # Fallback: simple concatenation (defensive .get access) + prompt_text = "\n".join(f"{m.get('role', '?')}: {m.get('content', '')}" for m in messages) else: # No tokenizer: simple concatenation - prompt_text = "\n".join(f"{m['role']}: {m['content']}" for m in messages) + prompt_text = "\n".join(f"{m.get('role', '?')}: {m.get('content', '')}" for m in messages) # Check if vllm_client is VLLMClient (server mode) or LLM (colocate mode) is_llm_object = hasattr(self.vllm_client, "llm_engine") # LLM has llm_engine if is_llm_object: # Colocate mode: use SamplingParams - print("[VLLMPolicy] Using vLLM LLM (colocate mode) with SamplingParams", flush=True) + logger.debug("[VLLMPolicy] Using vLLM LLM (colocate mode) with SamplingParams") from vllm import SamplingParams sampling_params = SamplingParams( @@ -103,7 +110,7 @@ async def _make_llm_call( n=1, ) - print("[VLLMPolicy] Calling LLM.generate()...", flush=True) + logger.debug("[VLLMPolicy] Calling LLM.generate()") outputs = self.vllm_client.generate([prompt_text], sampling_params=sampling_params, use_tqdm=False) # Extract from vLLM output format @@ -116,7 +123,7 @@ async def _make_llm_call( } else: # Server mode: use VLLMClient with kwargs - print("[VLLMPolicy] Using VLLMClient (server mode)", flush=True) + logger.debug("[VLLMPolicy] Using VLLMClient (server mode)") vllm_params = { "temperature": self.temperature, "max_tokens": self.max_tokens, @@ -126,7 +133,7 @@ async def _make_llm_call( } vllm_params.update(self.kwargs) - print("[VLLMPolicy] Calling vllm_client.generate()...", flush=True) + logger.debug("[VLLMPolicy] Calling vllm_client.generate()") response = self.vllm_client.generate( prompts=[prompt_text], **vllm_params, @@ -140,16 +147,18 @@ async def _make_llm_call( if self.tokenizer is not None: try: completion_text = self.tokenizer.decode(completion_ids, skip_special_tokens=True) - print("\n[VLLMPolicy] ===== GENERATION RESULT =====", flush=True) - print(f"[VLLMPolicy] Prompt tokens: {len(prompt_ids)}", flush=True) - print(f"[VLLMPolicy] Completion tokens: {len(completion_ids)}", flush=True) - print(f"[VLLMPolicy] FULL decoded completion ({len(completion_text)} chars):", flush=True) - print("───────────────────────────────────────", flush=True) - print(f"{completion_text}", flush=True) - print("───────────────────────────────────────", flush=True) - print("[VLLMPolicy] ==============================", flush=True) + logger.debug( + "[VLLMPolicy] Generation result: prompt_tokens=%d, completion_tokens=%d, completion_chars=%d", + len(prompt_ids), + len(completion_ids), + len(completion_text), + ) except Exception as e: - print(f"[VLLMPolicy] Warning: Failed to decode completion: {e}", flush=True) + logger.warning( + "[VLLMPolicy] Failed to decode completion: %s", + e, + exc_info=True, + ) completion_text = f"" else: # Fallback: just indicate number of tokens diff --git a/eval_protocol/pytest/integrations/openenv_trl_vllm.py b/eval_protocol/pytest/integrations/openenv_trl_vllm.py index 29b5634a..eb6b9f68 100644 --- a/eval_protocol/pytest/integrations/openenv_trl_vllm.py +++ b/eval_protocol/pytest/integrations/openenv_trl_vllm.py @@ -134,34 +134,18 @@ def rollout_func(prompts: List[str], trainer) -> Dict[str, List]: # 1) Build evaluation rows with rollout_id for logging import uuid - # Generate unique IDs for this batch - def _gen_id(): - import random - - words = [ - "quick", - "lazy", - "happy", - "bright", - "calm", - "bold", - "wise", - "kind", - ] - return f"{random.choice(words)}-{random.choice(words)}-{random.randint(10, 99)}" - evaluation_rows: List[EvaluationRow] = [] for prompt_idx, prompt in enumerate(prompts): # One evaluation row per incoming prompt. GRPOTrainer will handle # grouping by `num_generations` at the trainer level; the custom # rollout_func must return one set of tokens per prompt. rollout_id = f"openenv_vllm_{uuid.uuid4().hex[:12]}" - row_id = _gen_id() row = EvaluationRow( messages=[Message(role="user", content=prompt)], input_metadata=InputMetadata( - row_id=row_id, # Required for ep logs UI! + # Let Eval Protocol generate a stable row_id from content. + row_id=None, completion_params={}, ), ) diff --git a/eval_protocol/pytest/openenv_rollout_processor.py b/eval_protocol/pytest/openenv_rollout_processor.py index a2059ab0..5cdaa112 100644 --- a/eval_protocol/pytest/openenv_rollout_processor.py +++ b/eval_protocol/pytest/openenv_rollout_processor.py @@ -15,6 +15,7 @@ import asyncio import logging import time +from itertools import count from typing import List, Any, Dict, Callable, Generic, TypeVar, Optional, Type import json @@ -142,7 +143,9 @@ def __init__( self._viewport_height = viewport_height self._timeout_ms = timeout_ms self._num_generations = max(1, int(num_generations)) if num_generations else 1 - self._env_create_idx: int = 0 + # Counter used for task rotation when creating environments. Uses + # itertools.count to avoid race conditions across concurrent rollouts. + self._env_create_counter = count() if self._tasks and not self._task_var: raise ValueError("task_var must be provided when tasks are configured.") @@ -411,7 +414,9 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: ) row.execution_metadata.duration_seconds = time.perf_counter() - start_time - # Store rewards for TRL reward functions + # Store per-step rewards in a sentinel system message so + # evaluation tests and downstream integrations can reconstruct + # episode rewards. sentinel = "__ep_step_rewards__:" + json.dumps(step_rewards) messages.append(Message(role="system", content=sentinel)) @@ -469,7 +474,6 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: env.close() logger.debug("[OpenEnvRolloutProcessor] Environment closed successfully") except Exception as close_err: - print(f"[OpenEnvRolloutProcessor] Warning: Error closing environment: {close_err}", flush=True) logger.warning( "[OpenEnvRolloutProcessor] Error closing environment: %s", close_err, @@ -534,8 +538,9 @@ def _generic_factory(): # Select task for this env instance (if provided), grouped by num_generations selected_task: Optional[str] = None if self._tasks: - idx = self._env_create_idx - self._env_create_idx = idx + 1 + # Use a monotonic counter so concurrent environment creation + # does not reuse the same index across rollouts. + idx = next(self._env_create_counter) group = idx // max(1, self._num_generations) selected_task = self._tasks[group % len(self._tasks)] if not self._task_var: From 23ba2b3a13a699b497a821be50763b01a683ccb6 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Thu, 20 Nov 2025 06:13:02 +0000 Subject: [PATCH 08/10] updates --- .../pytest/integrations/openenv_trl_vllm.py | 15 +------ .../pytest/openenv_rollout_processor.py | 33 +++++++-------- tests/pytest/test_openenv_browsergym_eval.py | 17 +++----- tests/pytest/test_openenv_echo_hub.py | 42 +++++++++---------- tests/pytest/test_openenv_textarena_docker.py | 18 +++----- 5 files changed, 48 insertions(+), 77 deletions(-) diff --git a/eval_protocol/pytest/integrations/openenv_trl_vllm.py b/eval_protocol/pytest/integrations/openenv_trl_vllm.py index eb6b9f68..3f204680 100644 --- a/eval_protocol/pytest/integrations/openenv_trl_vllm.py +++ b/eval_protocol/pytest/integrations/openenv_trl_vllm.py @@ -387,22 +387,11 @@ async def _run_all() -> List[EvaluationRow]: if isinstance(extra, dict): prompt_ids = list(extra.get("prompt_ids", []) or []) completion_ids = list(extra.get("completion_ids", []) or []) + rewards = [float(r) for r in (extra.get("step_rewards", []) or [])] except Exception: prompt_ids = [] completion_ids = [] - - # Extract step rewards from the sentinel system message - for msg in row.messages: - if msg.role == "system": - try: - content = msg.content or "" - if isinstance(content, str) and content.startswith("__ep_step_rewards__:"): - import json - - payload = content.split(":", 1)[1] - rewards = json.loads(payload) or [] - except Exception: - pass + rewards = [] # Append accumulated tokens for this episode episode_prompt_ids.append(prompt_ids if prompt_ids else [0]) diff --git a/eval_protocol/pytest/openenv_rollout_processor.py b/eval_protocol/pytest/openenv_rollout_processor.py index 5cdaa112..c1ce7769 100644 --- a/eval_protocol/pytest/openenv_rollout_processor.py +++ b/eval_protocol/pytest/openenv_rollout_processor.py @@ -17,7 +17,6 @@ import time from itertools import count from typing import List, Any, Dict, Callable, Generic, TypeVar, Optional, Type -import json from openai.types import CompletionUsage @@ -414,26 +413,22 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: ) row.execution_metadata.duration_seconds = time.perf_counter() - start_time - # Store per-step rewards in a sentinel system message so - # evaluation tests and downstream integrations can reconstruct - # episode rewards. - sentinel = "__ep_step_rewards__:" + json.dumps(step_rewards) - messages.append(Message(role="system", content=sentinel)) - - # Attach accumulated token IDs to execution_metadata.extra for - # training integrations (e.g., TRL GRPO) instead of encoding - # them into synthetic system messages. - if all_prompt_ids or all_completion_ids: - try: - extra = getattr(row.execution_metadata, "extra", None) - if not isinstance(extra, dict): - extra = {} + # Attach per-step rewards and accumulated token IDs to + # execution_metadata.extra for downstream integrations + # (for example, TRL GRPO) instead of encoding them into + # synthetic system messages. + try: + extra = getattr(row.execution_metadata, "extra", None) + if not isinstance(extra, dict): + extra = {} + extra["step_rewards"] = list(step_rewards) + if all_prompt_ids or all_completion_ids: extra["prompt_ids"] = list(all_prompt_ids) extra["completion_ids"] = list(all_completion_ids) - row.execution_metadata.extra = extra # type: ignore[attr-defined] - except Exception: - # Non-fatal: training integrations can fall back if tokens are missing - pass + row.execution_metadata.extra = extra # type: ignore[attr-defined] + except Exception: + # Non-fatal: callers can fall back if metadata is missing + pass total_reward = sum(step_rewards) logger.info("[OpenEnvRolloutProcessor] ✅ ROLLOUT COMPLETE") diff --git a/tests/pytest/test_openenv_browsergym_eval.py b/tests/pytest/test_openenv_browsergym_eval.py index 8b87f423..505336ae 100644 --- a/tests/pytest/test_openenv_browsergym_eval.py +++ b/tests/pytest/test_openenv_browsergym_eval.py @@ -275,20 +275,13 @@ def test_openenv_browsergym_eval(row: EvaluationRow) -> EvaluationRow: """ if not _HAS_BG: pytest.skip("OpenEnv (envs.browsergym_env) is not installed; skipping BrowserGym test.") - # Extract step rewards from the sentinel system message injected by the rollout processor + # Extract step rewards from execution metadata (set by OpenEnvRolloutProcessor) step_rewards: List[float] = [] try: - for msg in row.messages or []: - if ( - msg.role == "system" - and isinstance(msg.content, str) - and msg.content.startswith("__ep_step_rewards__:") - ): - import json as _json - - payload = msg.content.split(":", 1)[1] - step_rewards = _json.loads(payload) or [] - break + extra = getattr(row.execution_metadata, "extra", None) + if isinstance(extra, dict): + raw = extra.get("step_rewards") or [] + step_rewards = [float(r) for r in raw] except Exception: step_rewards = [] diff --git a/tests/pytest/test_openenv_echo_hub.py b/tests/pytest/test_openenv_echo_hub.py index d252fc26..fab04554 100644 --- a/tests/pytest/test_openenv_echo_hub.py +++ b/tests/pytest/test_openenv_echo_hub.py @@ -8,6 +8,14 @@ from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor import pytest +try: + # Preferred import when using the monolithic `openenv` package + from openenv.envs.echo_env import EchoEnv # type: ignore + + _HAS_ECHO = True +except Exception: + _HAS_ECHO = False + # Skip these integration-heavy tests on CI runners by default pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI") @@ -35,20 +43,20 @@ def action_parser(response_text: str): Convert raw model response to EchoAction. """ try: - from envs.echo_env import EchoAction # type: ignore + from openenv.envs.echo_env import EchoAction # type: ignore except Exception: - pytest.skip("OpenEnv (envs.echo_env) is not installed; skipping Echo hub test.") + pytest.skip("OpenEnv (openenv.envs.echo_env) is not installed; skipping Echo hub test.") raise text = response_text.strip() if isinstance(response_text, str) else "" return EchoAction(message=text or "hello") -try: - from envs.echo_env import EchoEnv # type: ignore +# try: +# from envs.echo_env import EchoEnv # type: ignore - _HAS_ECHO = True -except Exception: - _HAS_ECHO = False +# _HAS_ECHO = True +# except Exception: +# _HAS_ECHO = False # Inline test data @@ -93,23 +101,15 @@ def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow: Extracts env rewards (from rollout policy extras) and sets evaluation_result. """ if not _HAS_ECHO: - pytest.skip("OpenEnv (envs.echo_env) is not installed; skipping Echo hub test.") - # Try to read rewards/usage left in execution metadata extra or system messages. + pytest.skip("OpenEnv (openenv.envs.echo_env) is not installed; skipping Echo hub test.") + # Try to read rewards/usage left in execution metadata extra. total_reward = 0.0 try: - # Preferred path: system sentinel "__ep_step_rewards__" + extra = getattr(row.execution_metadata, "extra", None) step_rewards: List[float] = [] - for msg in row.messages or []: - if ( - msg.role == "system" - and isinstance(msg.content, str) - and msg.content.startswith("__ep_step_rewards__:") - ): - import json as _json - - payload = msg.content.split(":", 1)[1] - step_rewards = _json.loads(payload) or [] - break + if isinstance(extra, dict): + raw = extra.get("step_rewards") or [] + step_rewards = [float(r) for r in raw] total_reward = float(sum(step_rewards)) if step_rewards else 0.0 except Exception: total_reward = 0.0 diff --git a/tests/pytest/test_openenv_textarena_docker.py b/tests/pytest/test_openenv_textarena_docker.py index 7e02b190..b752cb91 100644 --- a/tests/pytest/test_openenv_textarena_docker.py +++ b/tests/pytest/test_openenv_textarena_docker.py @@ -144,25 +144,19 @@ def test_openenv_textarena_docker(row: EvaluationRow) -> EvaluationRow: # Extract step rewards and compute score total_reward = 0.0 try: + extra = getattr(row.execution_metadata, "extra", None) step_rewards: List[float] = [] - for msg in row.messages or []: - if ( - msg.role == "system" - and isinstance(msg.content, str) - and msg.content.startswith("__ep_step_rewards__:") - ): - import json - - payload = msg.content.split(":", 1)[1] - step_rewards = json.loads(payload) or [] - break + if isinstance(extra, dict): + raw = extra.get("step_rewards") or [] + step_rewards = [float(r) for r in raw] total_reward = float(sum(step_rewards)) if step_rewards else 0.0 except Exception: total_reward = 0.0 score = max(0.0, min(1.0, total_reward)) + steps = len(step_rewards) if "step_rewards" in locals() else 0 row.evaluation_result = EvaluateResult( score=score, - reason=f"TextArena total reward={total_reward:.2f} over {len(step_rewards) if 'step_rewards' in locals() else 0} steps", + reason=f"TextArena total reward={total_reward:.2f} over {steps} steps", ) return row From 9766c5dc8570c0100767ff11129af52b6ef76e63 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Wed, 19 Nov 2025 22:48:25 -0800 Subject: [PATCH 09/10] reward --- eval_protocol/models.py | 8 ++++++++ tests/pytest/test_openenv_echo_hub.py | 17 ++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 67d287ba..b2f9af8a 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -776,6 +776,14 @@ class ExecutionMetadata(BaseModel): description="Processing duration in seconds for an entire experiment. Note that includes time it took for retries.", ) + # Generic bag for integration-specific metadata. + # Examples: + # - OpenEnvRolloutProcessor: per-step rewards, token IDs for GRPO / TRL + extra: Optional[Dict[str, Any]] = Field( + default=None, + description="Arbitrary execution metadata for integrations (step rewards, token IDs, debug info, etc.).", + ) + class EvaluationRow(BaseModel): """ diff --git a/tests/pytest/test_openenv_echo_hub.py b/tests/pytest/test_openenv_echo_hub.py index fab04554..ae9c2cdc 100644 --- a/tests/pytest/test_openenv_echo_hub.py +++ b/tests/pytest/test_openenv_echo_hub.py @@ -8,13 +8,10 @@ from eval_protocol.pytest.openenv_rollout_processor import OpenEnvRolloutProcessor import pytest -try: - # Preferred import when using the monolithic `openenv` package - from openenv.envs.echo_env import EchoEnv # type: ignore - _HAS_ECHO = True -except Exception: - _HAS_ECHO = False +# Preferred import when using the monolithic `openenv` package +from envs.echo_env import EchoEnv # type: ignore + # Skip these integration-heavy tests on CI runners by default pytestmark = pytest.mark.skipif(os.getenv("CI") == "true", reason="Skip OpenEnv integration tests on CI") @@ -43,7 +40,7 @@ def action_parser(response_text: str): Convert raw model response to EchoAction. """ try: - from openenv.envs.echo_env import EchoAction # type: ignore + from envs.echo_env import EchoAction # type: ignore except Exception: pytest.skip("OpenEnv (openenv.envs.echo_env) is not installed; skipping Echo hub test.") raise @@ -91,8 +88,6 @@ def action_parser(response_text: str): timeout_ms=5000, num_generations=1, ) - if _HAS_ECHO - else None ), ) def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow: @@ -100,8 +95,7 @@ def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow: Smoke test for Echo env via Hugging Face Hub (registry.hf.space/openenv-echo-env). Extracts env rewards (from rollout policy extras) and sets evaluation_result. """ - if not _HAS_ECHO: - pytest.skip("OpenEnv (openenv.envs.echo_env) is not installed; skipping Echo hub test.") + # Try to read rewards/usage left in execution metadata extra. total_reward = 0.0 try: @@ -110,6 +104,7 @@ def test_openenv_echo_hub(row: EvaluationRow) -> EvaluationRow: if isinstance(extra, dict): raw = extra.get("step_rewards") or [] step_rewards = [float(r) for r in raw] + print(f"Step rewards: {step_rewards}") total_reward = float(sum(step_rewards)) if step_rewards else 0.0 except Exception: total_reward = 0.0 From 707f7cd295505dd36ecf1f9eac5377100aed4a74 Mon Sep 17 00:00:00 2001 From: Shrey Modi Date: Thu, 20 Nov 2025 07:06:03 +0000 Subject: [PATCH 10/10] lint --- eval_protocol/models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 06481f39..be964b1a 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -782,6 +782,8 @@ class ExecutionMetadata(BaseModel): extra: Optional[Dict[str, Any]] = Field( default=None, description="Arbitrary execution metadata for integrations (step rewards, token IDs, debug info, etc.).", + ) + finish_reason: Optional[str] = Field( default=None, description="finish_reason reported by the completion response for this row.",