-
Notifications
You must be signed in to change notification settings - Fork 0
feat: Add Logfire error tracking to fleet env #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
272b0b3
216d16a
34dd0d9
03fbb92
9d12c3e
0bc9cfd
b061b4e
d8c5ddc
e627cd0
73b81b5
1e9bfce
f4ed59b
327c782
887fd1f
84de403
77b9d6a
540530a
199f67f
33d53c9
0d37811
f86fa49
290600e
c99c1e5
fc0508f
31fa602
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
|
|
||
| from .mcp_tools import FleetMCPTools | ||
| from .models import CallToolAction, ListToolsAction | ||
| from .telemetry import fleet_error, fleet_warning, fleet_info | ||
|
|
||
|
|
||
| class FleetEnvClient(HTTPEnvClient[Action, Observation]): | ||
|
|
@@ -50,12 +51,12 @@ def from_fleet( | |
| cls: Type["FleetEnvClient"], | ||
| api_key: str, | ||
| env_key: str, | ||
| data_key: str, | ||
| data_version: str, | ||
| image_type: str, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Breaking API change leaves existing callers without required argsHigh Severity
Additional Locations (1) |
||
| region: Optional[str] = None, | ||
| ttl_seconds: Optional[int] = 3600, | ||
| env_variables: Optional[Dict[str, Any]] = None, | ||
| image_type: Optional[str] = None, | ||
| data_key: Optional[str] = None, | ||
| data_version: Optional[str] = None, | ||
| **kwargs: Any, | ||
| ) -> Tuple["FleetEnvClient", FleetMCPTools]: | ||
| try: | ||
|
|
@@ -80,6 +81,7 @@ def from_fleet( | |
|
|
||
| import time | ||
| import logging | ||
|
|
||
| _logger = logging.getLogger(__name__) | ||
|
|
||
| _logger.info(f"Creating Fleet instance: env_key={env_key}, ttl={ttl_seconds}s") | ||
|
|
@@ -92,12 +94,14 @@ def from_fleet( | |
|
|
||
| for attempt in range(max_retries): | ||
| try: | ||
| # Fleet SDK expects image_type=None for standard images | ||
| sdk_image_type = image_type if image_type == "mcp" else None | ||
| env = fleet.make( | ||
| env_key=env_key, | ||
| region=region, | ||
| ttl_seconds=ttl_seconds, | ||
| env_variables=env_variables, | ||
| image_type=image_type, | ||
| image_type=sdk_image_type, | ||
| data_key=data_key_spec, | ||
| ) | ||
| break # Success | ||
|
|
@@ -114,18 +118,166 @@ def from_fleet( | |
| f"[env={env_key}] Fleet.make() failed (attempt {attempt + 1}/{max_retries}): {e}. " | ||
| f"Retrying in {delay:.1f}s..." | ||
| ) | ||
| fleet_warning( | ||
| "fleet_make_retry", | ||
| attempt=attempt + 1, | ||
| max_retries=max_retries, | ||
| error_type=type(e).__name__, | ||
| error_message=str(e), | ||
| retry_delay_s=delay, | ||
| ) | ||
| time.sleep(delay) | ||
| else: | ||
| _logger.error( | ||
| f"[env={env_key}] Fleet.make() failed after {attempt + 1} attempt(s): {e}" | ||
| ) | ||
| fleet_error( | ||
| "fleet_make_failed", | ||
| attempt=attempt + 1, | ||
| max_retries=max_retries, | ||
| error_type=type(e).__name__, | ||
| error_message=str(e), | ||
| ) | ||
| raise | ||
|
|
||
| _logger.info(f"Fleet instance ready in {time.time() - start:.1f}s: {env.instance_id}") | ||
| elapsed = time.time() - start | ||
| instance_id = getattr(env, "instance_id", "unknown") | ||
| _logger.info(f"Fleet instance ready in {elapsed:.1f}s: {instance_id}") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sync
|
||
|
|
||
| root = env.urls.root | ||
| # Fleet currently exposes multiple MCP endpoints. Prefer /api/v1/mcp first. | ||
| mcp_urls = (f"{root}api/v1/mcp", f"{root}mcp") | ||
| # Pick MCP endpoint based on modality: | ||
| # - computer_use: aggregator on port 8081 (has computer tool + API tools) | ||
| # - tool_use: per-env MCP server on port 3003 (API tools only) | ||
| if image_type == "mcp": | ||
| mcp_urls = (f"{root}api/v1/mcp",) | ||
| else: | ||
| mcp_urls = (f"{root}mcp",) | ||
|
|
||
| orch = cls( | ||
| base_url=env.urls.manager.api, | ||
| fleet_env_handle=env, | ||
| api_key=api_key, | ||
| mcp_urls=mcp_urls, | ||
| **kwargs, | ||
| ) | ||
| tools = FleetMCPTools(api_key=api_key, mcp_urls=mcp_urls) | ||
| return orch, tools | ||
|
|
||
| @classmethod | ||
| async def from_fleet_async( | ||
| cls: Type["FleetEnvClient"], | ||
| api_key: str, | ||
| env_key: str, | ||
| data_key: str, | ||
| data_version: str, | ||
| image_type: str, | ||
| region: Optional[str] = None, | ||
| ttl_seconds: Optional[int] = 3600, | ||
| env_variables: Optional[Dict[str, Any]] = None, | ||
| **kwargs: Any, | ||
| ) -> Tuple["FleetEnvClient", FleetMCPTools]: | ||
| """Async version of from_fleet() — does not block the event loop. | ||
|
|
||
| Uses AsyncFleet.make() for provisioning and asyncio.sleep() for retries, | ||
| allowing other async trajectories to progress while waiting. | ||
| """ | ||
| try: | ||
| from fleet._async import AsyncFleet | ||
| except ImportError as e: | ||
| raise ImportError( | ||
| "Fleet support requires the optional dependency set. " | ||
| "Install with `pip install openenv[fleet]`." | ||
| ) from e | ||
|
|
||
| async_fleet = AsyncFleet(api_key=api_key) | ||
|
|
||
| # Fleet SDK expects data_key in "key:version" format | ||
| data_key_spec = None | ||
| if data_key: | ||
| if data_version: | ||
| data_key_spec = f"{data_key}:{data_version}" | ||
| else: | ||
| data_key_spec = data_key | ||
|
|
||
| import time | ||
| import logging | ||
|
|
||
| _logger = logging.getLogger(__name__) | ||
|
|
||
| _logger.info(f"Creating Fleet instance (async): env_key={env_key}, ttl={ttl_seconds}s") | ||
| start = time.time() | ||
|
|
||
| # Retry logic with async sleep (non-blocking) | ||
| max_retries = 3 | ||
| retry_base_delay = 2.0 # seconds | ||
| env = None | ||
|
|
||
| # Fleet SDK expects image_type=None for standard images | ||
| sdk_image_type = image_type if image_type == "mcp" else None | ||
|
|
||
| for attempt in range(max_retries): | ||
| try: | ||
| env = await async_fleet.make( | ||
| env_key=env_key, | ||
| region=region, | ||
| ttl_seconds=ttl_seconds, | ||
| env_variables=env_variables, | ||
| image_type=sdk_image_type, | ||
| data_key=data_key_spec, | ||
| ) | ||
| break # Success | ||
| except Exception as e: | ||
| error_msg = str(e) | ||
| # Retry on transient errors (health check failures, timeouts, etc.) | ||
| is_transient = any( | ||
| x in error_msg.lower() | ||
| for x in ["health check", "timeout", "connection", "temporarily"] | ||
| ) | ||
| if attempt < max_retries - 1 and is_transient: | ||
| delay = retry_base_delay * (2**attempt) | ||
| _logger.warning( | ||
| f"[env={env_key}] AsyncFleet.make() failed (attempt {attempt + 1}/{max_retries}): {e}. " | ||
| f"Retrying in {delay:.1f}s..." | ||
| ) | ||
| fleet_warning( | ||
| "fleet_make_retry", | ||
| attempt=attempt + 1, | ||
| max_retries=max_retries, | ||
| error_type=type(e).__name__, | ||
| error_message=str(e), | ||
| retry_delay_s=delay, | ||
| ) | ||
| await asyncio.sleep(delay) | ||
| else: | ||
| _logger.error( | ||
| f"[env={env_key}] AsyncFleet.make() failed after {attempt + 1} attempt(s): {e}" | ||
| ) | ||
| fleet_error( | ||
| "fleet_make_failed", | ||
| attempt=attempt + 1, | ||
| max_retries=max_retries, | ||
| error_type=type(e).__name__, | ||
| error_message=str(e), | ||
| ) | ||
| raise | ||
|
|
||
| elapsed = time.time() - start | ||
| instance_id = getattr(env, "instance_id", "unknown") | ||
| _logger.info(f"Fleet instance ready (async) in {elapsed:.1f}s: {instance_id}") | ||
| fleet_info( | ||
| "fleet_provisioning_completed", | ||
| provisioning_time_s=round(elapsed, 1), | ||
| instance_id=instance_id, | ||
| ) | ||
|
|
||
| root = env.urls.root | ||
| # Pick MCP endpoint based on modality: | ||
| # - computer_use (image_type="mcp"): aggregator on port 8081 (has computer tool + API tools) | ||
| # - tool_use: per-env MCP server on port 3003 (API tools only) | ||
| if image_type == "mcp": | ||
| mcp_urls = (f"{root}api/v1/mcp",) | ||
| else: | ||
| mcp_urls = (f"{root}mcp",) | ||
|
|
||
| orch = cls( | ||
| base_url=env.urls.manager.api, | ||
|
|
@@ -185,4 +337,12 @@ def close(self) -> None: | |
| self._fleet_env.close() | ||
| super().close() | ||
|
|
||
| async def close_async(self) -> None: | ||
| """Async close — runs sync Fleet close in a thread to avoid blocking the event loop.""" | ||
| if self._fleet_env: | ||
| await asyncio.to_thread(self._fleet_env.close) | ||
| super().close() | ||
|
|
||
| async def reset_async(self) -> "StepResult": | ||
| """Async reset — runs sync HTTP reset in a thread to avoid blocking the event loop.""" | ||
| return await asyncio.to_thread(self.reset) | ||


Uh oh!
There was an error while loading. Please reload this page.