From 77835582fd59979b528c1666288afdb020121823 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sat, 2 May 2026 09:48:04 +0530 Subject: [PATCH 01/11] Fix coding_env API signature compatibility Accept optional reset/step parameters in PythonCodeActEnv and add tests for episode_id and timeout_s handling. --- envs/coding_env/server/python_codeact_env.py | 20 ++++++++++++++++--- tests/envs/test_python_codeact_reset.py | 21 ++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index dbfc39e6a..edeb4441f 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -12,6 +12,7 @@ """ import uuid +from typing import Any, Optional from openenv.core.env_server.interfaces import Action, Environment, Observation @@ -50,7 +51,12 @@ def __init__( self._executor = PyExecutor() self._state = CodeState() - def reset(self) -> Observation: + def reset( + self, + seed: Optional[int] = None, + episode_id: Optional[str] = None, + **kwargs: Any, + ) -> Observation: """ Reset environment and start fresh execution session. @@ -58,7 +64,10 @@ def reset(self) -> Observation: Initial observation with empty stdout/stderr and exit_code=0 """ # Initialize fresh state - self._state = CodeState(episode_id=str(uuid.uuid4()), step_count=0) + self._state = CodeState( + episode_id=episode_id or str(uuid.uuid4()), + step_count=0, + ) # Add last_exit_code to state self._state.last_exit_code = 0 @@ -77,7 +86,12 @@ def reset(self) -> Observation: return self._apply_transform(observation) - def step(self, action: Action) -> Observation: + def step( + self, + action: Action, + timeout_s: Optional[float] = None, + **kwargs: Any, + ) -> Observation: """ Execute code action and return observation. diff --git a/tests/envs/test_python_codeact_reset.py b/tests/envs/test_python_codeact_reset.py index b4d8b59f1..bd0a767c9 100644 --- a/tests/envs/test_python_codeact_reset.py +++ b/tests/envs/test_python_codeact_reset.py @@ -166,3 +166,24 @@ def test_reset_changes_episode_id(): # Episode IDs should be different assert episode_id_1 != episode_id_2 + + +def test_reset_accepts_episode_id_override(): + """Test that reset() accepts an explicit episode_id.""" + env = PythonCodeActEnv() + + env.reset(episode_id="episode-123") + + assert env.state.episode_id == "episode-123" + assert env.state.step_count == 0 + + +def test_step_accepts_timeout_parameter(): + """Test that step() accepts timeout_s without raising TypeError.""" + env = PythonCodeActEnv() + env.reset() + + obs = env.step(CodeAction(code="print('ok')"), timeout_s=0.5) + + assert obs.exit_code == 0 + assert "ok" in obs.stdout From 11e43fe35c8b42a428b7e2d2ba0ed9f0fdbdbdc6 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sat, 2 May 2026 10:06:56 +0530 Subject: [PATCH 02/11] Fix coding_env safety reward false positives with AST detection --- envs/coding_env/server/__init__.py | 16 +++++- envs/coding_env/server/transforms.py | 61 ++++++++++++++++------ tests/envs/test_coding_safety_transform.py | 57 ++++++++++++++++++++ 3 files changed, 115 insertions(+), 19 deletions(-) create mode 100644 tests/envs/test_coding_safety_transform.py diff --git a/envs/coding_env/server/__init__.py b/envs/coding_env/server/__init__.py index dab6b748a..41d01bba7 100644 --- a/envs/coding_env/server/__init__.py +++ b/envs/coding_env/server/__init__.py @@ -4,8 +4,20 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. -"""Coding environment server components.""" +"""Coding environment server components. -from .python_codeact_env import PythonCodeActEnv +Keep imports lazy so utility modules (for example transforms) remain importable +without pulling optional runtime dependencies like smolagents. +""" + +from typing import Any __all__ = ["PythonCodeActEnv"] + + +def __getattr__(name: str) -> Any: + if name == "PythonCodeActEnv": + from .python_codeact_env import PythonCodeActEnv + + return PythonCodeActEnv + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index fc92e89ba..101f74cfe 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -7,7 +7,6 @@ """Transforms specific to coding environments.""" import ast -import re from openenv.core.env_server.base_transforms import CompositeTransform from openenv.core.env_server.interfaces import Transform @@ -21,14 +20,44 @@ class CodeSafetyTransform(Transform): def __init__(self, penalty: float = -1.0): self.penalty = penalty - self.dangerous_patterns = [ - r"import\s+os", - r"import\s+subprocess", - r"eval\(", - r"exec\(", - r"__import__", - r"open\(", - ] + + def _detect_violation(self, code: str) -> str | None: + """ + Detect dangerous operations using AST analysis. + + AST-based detection avoids false positives from harmless string literals + (e.g. ``print("import os")``) or similarly named user functions + (e.g. ``myopen()``). + """ + try: + tree = ast.parse(code) + except SyntaxError: + # Syntax quality is handled by CodeQualityTransform. + return None + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + top_level_module = alias.name.split(".", 1)[0] + if top_level_module in {"os", "subprocess"}: + return f"import {top_level_module}" + + if isinstance(node, ast.ImportFrom) and node.module: + top_level_module = node.module.split(".", 1)[0] + if top_level_module in {"os", "subprocess"}: + return f"import {top_level_module}" + + if isinstance(node, ast.Call): + called_name: str | None = None + if isinstance(node.func, ast.Name): + called_name = node.func.id + elif isinstance(node.func, ast.Attribute): + called_name = node.func.attr + + if called_name in {"eval", "exec", "open", "__import__"}: + return called_name + + return None def __call__(self, observation: Observation) -> Observation: if not isinstance(observation, CodeObservation): @@ -36,14 +65,12 @@ def __call__(self, observation: Observation) -> Observation: if "last_code" in observation.metadata: code = observation.metadata["last_code"] - for pattern in self.dangerous_patterns: - if re.search(pattern, code): - observation.reward = self.penalty - observation.metadata["safety_violation"] = pattern - break - else: - if observation.reward is None: - observation.reward = 0.0 + violation = self._detect_violation(code) + if violation is not None: + observation.reward = self.penalty + observation.metadata["safety_violation"] = violation + elif observation.reward is None: + observation.reward = 0.0 return observation diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py new file mode 100644 index 000000000..9b4768cff --- /dev/null +++ b/tests/envs/test_coding_safety_transform.py @@ -0,0 +1,57 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Tests for coding_env safety transform false-positive handling.""" + +import os +import sys +from pathlib import Path + +# Add the project root and src to the path +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) +sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) + +from envs.coding_env.models import CodeObservation +from envs.coding_env.server.transforms import CodeSafetyTransform + + +def _apply_safety_transform(code: str) -> CodeObservation: + transform = CodeSafetyTransform() + observation = CodeObservation( + stdout="", + stderr="", + exit_code=0, + metadata={"last_code": code}, + ) + transformed = transform(observation) + assert isinstance(transformed, CodeObservation) + return transformed + + +def test_blocks_real_dangerous_import(): + observation = _apply_safety_transform("import os\nprint('x')") + assert observation.reward == -1.0 + assert "safety_violation" in observation.metadata + + +def test_blocks_builtin_open_call(): + observation = _apply_safety_transform("with open('f.txt') as f:\n data = f.read()") + assert observation.reward == -1.0 + assert "safety_violation" in observation.metadata + + +def test_does_not_flag_string_literal_with_dangerous_text(): + observation = _apply_safety_transform("print('import os')") + assert observation.reward == 0.0 + assert "safety_violation" not in observation.metadata + + +def test_does_not_flag_user_defined_myopen_function(): + observation = _apply_safety_transform( + "def myopen():\n return 1\nresult = myopen()" + ) + assert observation.reward == 0.0 + assert "safety_violation" not in observation.metadata From 7f989f7686f88d787ec2d8d19dc6e20346c28de1 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sat, 2 May 2026 11:01:16 +0530 Subject: [PATCH 03/11] Address Greptile findings for coding_env safety and step signature --- envs/coding_env/server/python_codeact_env.py | 2 -- envs/coding_env/server/transforms.py | 8 ++------ tests/envs/test_coding_safety_transform.py | 12 ++++++++++++ tests/envs/test_python_codeact_reset.py | 11 ----------- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index edeb4441f..061642ed2 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -89,8 +89,6 @@ def reset( def step( self, action: Action, - timeout_s: Optional[float] = None, - **kwargs: Any, ) -> Observation: """ Execute code action and return observation. diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index 101f74cfe..a47b80ad8 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -48,14 +48,10 @@ def _detect_violation(self, code: str) -> str | None: return f"import {top_level_module}" if isinstance(node, ast.Call): - called_name: str | None = None if isinstance(node.func, ast.Name): called_name = node.func.id - elif isinstance(node.func, ast.Attribute): - called_name = node.func.attr - - if called_name in {"eval", "exec", "open", "__import__"}: - return called_name + if called_name in {"eval", "exec", "open", "__import__"}: + return called_name return None diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 9b4768cff..4f59193a1 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -55,3 +55,15 @@ def test_does_not_flag_user_defined_myopen_function(): ) assert observation.reward == 0.0 assert "safety_violation" not in observation.metadata + + +def test_does_not_flag_attribute_method_named_exec(): + observation = _apply_safety_transform( + "class DB:\n" + " def exec(self, sql):\n" + " return sql\n" + "db = DB()\n" + "result = db.exec('SELECT 1')" + ) + assert observation.reward == 0.0 + assert "safety_violation" not in observation.metadata diff --git a/tests/envs/test_python_codeact_reset.py b/tests/envs/test_python_codeact_reset.py index bd0a767c9..e6c6ed113 100644 --- a/tests/envs/test_python_codeact_reset.py +++ b/tests/envs/test_python_codeact_reset.py @@ -176,14 +176,3 @@ def test_reset_accepts_episode_id_override(): assert env.state.episode_id == "episode-123" assert env.state.step_count == 0 - - -def test_step_accepts_timeout_parameter(): - """Test that step() accepts timeout_s without raising TypeError.""" - env = PythonCodeActEnv() - env.reset() - - obs = env.step(CodeAction(code="print('ok')"), timeout_s=0.5) - - assert obs.exit_code == 0 - assert "ok" in obs.stdout From fa2e415c1a4cbc1be845e7500b47a51e7ab30696 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 01:24:31 +0530 Subject: [PATCH 04/11] Address coding env API review issues --- envs/coding_env/server/app.py | 3 ++- envs/coding_env/server/python_codeact_env.py | 7 +++++++ envs/coding_env/server/transforms.py | 4 +++- tests/envs/test_coding_safety_transform.py | 16 +++++----------- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/envs/coding_env/server/app.py b/envs/coding_env/server/app.py index 4c712916b..2271b69de 100644 --- a/envs/coding_env/server/app.py +++ b/envs/coding_env/server/app.py @@ -21,9 +21,10 @@ python -m envs.coding_env.server.app """ +from openenv.core.env_server import create_app + from coding_env.models import CodeAction, CodeObservation from coding_env.server.python_codeact_env import PythonCodeActEnv -from openenv.core.env_server import create_app # Create the app with web interface and README integration # Pass the class (factory) instead of an instance for WebSocket session support diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 061642ed2..2ebac30d7 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -89,12 +89,17 @@ def reset( def step( self, action: Action, + timeout_s: Optional[float] = None, + **kwargs: Any, ) -> Observation: """ Execute code action and return observation. Args: action: CodeAction containing the code to execute + timeout_s: Optional timeout accepted for Environment API compatibility. + PyExecutor does not currently expose per-call timeout control. + **kwargs: Additional step parameters accepted for API compatibility. Returns: CodeObservation with execution results (stdout, stderr, exit_code) @@ -102,6 +107,8 @@ def step( Raises: ValueError: If action is not a CodeAction instance """ + del timeout_s, kwargs + if not isinstance(action, CodeAction): raise ValueError(f"Expected CodeAction, got {type(action)}") diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index a47b80ad8..f5a856012 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -32,7 +32,9 @@ def _detect_violation(self, code: str) -> str | None: try: tree = ast.parse(code) except SyntaxError: - # Syntax quality is handled by CodeQualityTransform. + # Intentional trade-off: once the code is syntactically invalid, + # this AST-only safety pass cannot reliably inspect partial code. + # CodeQualityTransform applies the syntax penalty instead. return None for node in ast.walk(tree): diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 4f59193a1..cf76503b4 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -6,16 +6,8 @@ """Tests for coding_env safety transform false-positive handling.""" -import os -import sys -from pathlib import Path - -# Add the project root and src to the path -sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -from envs.coding_env.models import CodeObservation -from envs.coding_env.server.transforms import CodeSafetyTransform +from coding_env.models import CodeObservation +from coding_env.server.transforms import CodeSafetyTransform def _apply_safety_transform(code: str) -> CodeObservation: @@ -38,7 +30,9 @@ def test_blocks_real_dangerous_import(): def test_blocks_builtin_open_call(): - observation = _apply_safety_transform("with open('f.txt') as f:\n data = f.read()") + observation = _apply_safety_transform( + "with open('f.txt') as f:\n data = f.read()" + ) assert observation.reward == -1.0 assert "safety_violation" in observation.metadata From 3efb5ec6ea1c00839858a1bca70c9c48ce8f8a90 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 01:38:09 +0530 Subject: [PATCH 05/11] Tighten coding env API signatures --- envs/coding_env/server/python_codeact_env.py | 10 +--------- tests/envs/test_coding_safety_transform.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 2ebac30d7..6bef79bd0 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -12,7 +12,7 @@ """ import uuid -from typing import Any, Optional +from typing import Optional from openenv.core.env_server.interfaces import Action, Environment, Observation @@ -55,7 +55,6 @@ def reset( self, seed: Optional[int] = None, episode_id: Optional[str] = None, - **kwargs: Any, ) -> Observation: """ Reset environment and start fresh execution session. @@ -89,17 +88,12 @@ def reset( def step( self, action: Action, - timeout_s: Optional[float] = None, - **kwargs: Any, ) -> Observation: """ Execute code action and return observation. Args: action: CodeAction containing the code to execute - timeout_s: Optional timeout accepted for Environment API compatibility. - PyExecutor does not currently expose per-call timeout control. - **kwargs: Additional step parameters accepted for API compatibility. Returns: CodeObservation with execution results (stdout, stderr, exit_code) @@ -107,8 +101,6 @@ def step( Raises: ValueError: If action is not a CodeAction instance """ - del timeout_s, kwargs - if not isinstance(action, CodeAction): raise ValueError(f"Expected CodeAction, got {type(action)}") diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index cf76503b4..323b61bb2 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -37,6 +37,24 @@ def test_blocks_builtin_open_call(): assert "safety_violation" in observation.metadata +def test_blocks_builtin_eval_call(): + observation = _apply_safety_transform("result = eval('1 + 1')") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "eval" + + +def test_blocks_builtin_exec_call(): + observation = _apply_safety_transform("exec('x = 1')") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "exec" + + +def test_blocks_builtin_import_call(): + observation = _apply_safety_transform("__import__('os')") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "__import__" + + def test_does_not_flag_string_literal_with_dangerous_text(): observation = _apply_safety_transform("print('import os')") assert observation.reward == 0.0 From d378b68335326fce087afaaa7c2832465a84d04e Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 07:46:48 +0530 Subject: [PATCH 06/11] Clarify coding env reset compatibility --- envs/coding_env/server/python_codeact_env.py | 12 +++++++++++- envs/coding_env/server/transforms.py | 7 ++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 6bef79bd0..2804d6a65 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -12,7 +12,7 @@ """ import uuid -from typing import Optional +from typing import Any, Optional from openenv.core.env_server.interfaces import Action, Environment, Observation @@ -55,13 +55,23 @@ def reset( self, seed: Optional[int] = None, episode_id: Optional[str] = None, + **kwargs: Any, ) -> Observation: """ Reset environment and start fresh execution session. + Args: + seed: Accepted for API compatibility. This deterministic executor + has no random state to seed. + episode_id: Optional episode identifier override. + **kwargs: Forward-compatible reset parameters accepted by the base + Environment API but unused by this environment. + Returns: Initial observation with empty stdout/stderr and exit_code=0 """ + del seed, kwargs + # Initialize fresh state self._state = CodeState( episode_id=episode_id or str(uuid.uuid4()), diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index f5a856012..371d3c0d3 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -16,7 +16,12 @@ class CodeSafetyTransform(Transform): - """Evaluates code safety and assigns penalties for dangerous patterns.""" + """ + Assign penalties for obviously unsafe coding patterns. + + This is a reward heuristic, not a security sandbox. Container isolation is + the security boundary; this transform only shapes rewards for common cases. + """ def __init__(self, penalty: float = -1.0): self.penalty = penalty From d180b8c26ee4108fb1b2a1e6958314c87911e7b9 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 07:57:37 +0530 Subject: [PATCH 07/11] Harden coding safety AST parsing --- envs/coding_env/server/transforms.py | 9 +++++---- tests/envs/test_coding_safety_transform.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index 371d3c0d3..2cc25b7fd 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -36,10 +36,11 @@ def _detect_violation(self, code: str) -> str | None: """ try: tree = ast.parse(code) - except SyntaxError: - # Intentional trade-off: once the code is syntactically invalid, - # this AST-only safety pass cannot reliably inspect partial code. - # CodeQualityTransform applies the syntax penalty instead. + except (SyntaxError, RecursionError, ValueError): + # Intentional trade-off: once the code is syntactically invalid or + # pathologically nested, this AST-only safety pass cannot reliably + # inspect partial code. CodeQualityTransform applies the syntax + # penalty instead. return None for node in ast.walk(tree): diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 323b61bb2..0ded50ac9 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -29,6 +29,24 @@ def test_blocks_real_dangerous_import(): assert "safety_violation" in observation.metadata +def test_blocks_subprocess_import(): + observation = _apply_safety_transform("import subprocess") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "import subprocess" + + +def test_blocks_from_subprocess_import(): + observation = _apply_safety_transform("from subprocess import run") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "import subprocess" + + +def test_blocks_from_os_path_import(): + observation = _apply_safety_transform("from os.path import join") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "import os" + + def test_blocks_builtin_open_call(): observation = _apply_safety_transform( "with open('f.txt') as f:\n data = f.read()" From b4b304830c1acf375af25fa4d6eebe961a129d37 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Thu, 14 May 2026 08:12:57 +0530 Subject: [PATCH 08/11] Harden coding env quality transform --- envs/coding_env/server/python_codeact_env.py | 2 +- envs/coding_env/server/transforms.py | 2 +- tests/envs/test_coding_safety_transform.py | 22 +++++++++++++++++++- tests/envs/test_python_codeact_reset.py | 10 +++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 2804d6a65..043838096 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -74,7 +74,7 @@ def reset( # Initialize fresh state self._state = CodeState( - episode_id=episode_id or str(uuid.uuid4()), + episode_id=episode_id if episode_id is not None else str(uuid.uuid4()), step_count=0, ) # Add last_exit_code to state diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index 2cc25b7fd..5baed87ce 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -108,7 +108,7 @@ def __call__(self, observation: Observation) -> Observation: # Check syntax (redundant but useful for quality assessment) try: ast.parse(code) - except SyntaxError: + except (SyntaxError, RecursionError, ValueError): quality_score += self.syntax_penalty # Add to existing reward diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 0ded50ac9..9a0986f35 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -7,7 +7,7 @@ """Tests for coding_env safety transform false-positive handling.""" from coding_env.models import CodeObservation -from coding_env.server.transforms import CodeSafetyTransform +from coding_env.server.transforms import CodeQualityTransform, CodeSafetyTransform def _apply_safety_transform(code: str) -> CodeObservation: @@ -97,3 +97,23 @@ def test_does_not_flag_attribute_method_named_exec(): ) assert observation.reward == 0.0 assert "safety_violation" not in observation.metadata + + +def test_quality_transform_handles_ast_recursion_error(monkeypatch): + def raise_recursion_error(_code: str): + raise RecursionError("pathologically nested code") + + monkeypatch.setattr("coding_env.server.transforms.ast.parse", raise_recursion_error) + + transform = CodeQualityTransform(concise_bonus=0.0, syntax_penalty=-0.2) + observation = CodeObservation( + stdout="", + stderr="", + exit_code=0, + metadata={"last_code": "x = 1"}, + ) + + transformed = transform(observation) + + assert isinstance(transformed, CodeObservation) + assert transformed.reward == -0.2 diff --git a/tests/envs/test_python_codeact_reset.py b/tests/envs/test_python_codeact_reset.py index e6c6ed113..55bd9c03b 100644 --- a/tests/envs/test_python_codeact_reset.py +++ b/tests/envs/test_python_codeact_reset.py @@ -176,3 +176,13 @@ def test_reset_accepts_episode_id_override(): assert env.state.episode_id == "episode-123" assert env.state.step_count == 0 + + +def test_reset_preserves_empty_episode_id_override(): + """Test that reset() preserves any explicit non-None episode_id.""" + env = PythonCodeActEnv() + + env.reset(episode_id="") + + assert env.state.episode_id == "" + assert env.state.step_count == 0 From 8dc4a04b03a9f698de181df8563e43675c9c0571 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sat, 13 Jun 2026 22:12:00 +0530 Subject: [PATCH 09/11] Align coding env step signature --- envs/coding_env/server/python_codeact_env.py | 8 ++++++++ tests/envs/test_coding_safety_transform.py | 6 ++++++ tests/envs/test_python_codeact_reset.py | 20 ++++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index 043838096..eaa727152 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -98,12 +98,18 @@ def reset( def step( self, action: Action, + timeout_s: Optional[float] = None, + **kwargs: Any, ) -> Observation: """ Execute code action and return observation. Args: action: CodeAction containing the code to execute + timeout_s: Accepted for Environment API compatibility. PyExecutor + does not currently expose per-call timeout control. + **kwargs: Forward-compatible step parameters accepted by the base + Environment API but unused by this environment. Returns: CodeObservation with execution results (stdout, stderr, exit_code) @@ -111,6 +117,8 @@ def step( Raises: ValueError: If action is not a CodeAction instance """ + del timeout_s, kwargs + if not isinstance(action, CodeAction): raise ValueError(f"Expected CodeAction, got {type(action)}") diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 9a0986f35..c222da9cc 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -29,6 +29,12 @@ def test_blocks_real_dangerous_import(): assert "safety_violation" in observation.metadata +def test_blocks_import_with_alias(): + observation = _apply_safety_transform("import os as operating_system") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "import os" + + def test_blocks_subprocess_import(): observation = _apply_safety_transform("import subprocess") assert observation.reward == -1.0 diff --git a/tests/envs/test_python_codeact_reset.py b/tests/envs/test_python_codeact_reset.py index 55bd9c03b..801cd46dc 100644 --- a/tests/envs/test_python_codeact_reset.py +++ b/tests/envs/test_python_codeact_reset.py @@ -178,6 +178,16 @@ def test_reset_accepts_episode_id_override(): assert env.state.step_count == 0 +def test_reset_accepts_seed_parameter(): + """Test that reset() accepts a seed for API compatibility.""" + env = PythonCodeActEnv() + + obs = env.reset(seed=42) + + assert obs.exit_code == 0 + assert env.state.step_count == 0 + + def test_reset_preserves_empty_episode_id_override(): """Test that reset() preserves any explicit non-None episode_id.""" env = PythonCodeActEnv() @@ -186,3 +196,13 @@ def test_reset_preserves_empty_episode_id_override(): assert env.state.episode_id == "" assert env.state.step_count == 0 + + +def test_step_accepts_timeout_parameter(): + """Test that step() accepts timeout_s for API compatibility.""" + env = PythonCodeActEnv() + env.reset() + + obs = env.step(CodeAction(code="print('ok')"), timeout_s=30.0) + + assert obs.exit_code == 0 From 0458ee20376d07ac46163d8436e852db4e589a73 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Sun, 14 Jun 2026 07:26:45 +0530 Subject: [PATCH 10/11] Close coding env safety review gaps --- envs/coding_env/server/__init__.py | 9 +++++- envs/coding_env/server/python_codeact_env.py | 2 +- envs/coding_env/server/transforms.py | 33 +++++++++++++++----- tests/envs/test_coding_safety_transform.py | 16 +++++++++- tests/envs/test_python_codeact_reset.py | 6 ++-- 5 files changed, 53 insertions(+), 13 deletions(-) diff --git a/envs/coding_env/server/__init__.py b/envs/coding_env/server/__init__.py index 41d01bba7..33f7e7894 100644 --- a/envs/coding_env/server/__init__.py +++ b/envs/coding_env/server/__init__.py @@ -10,11 +10,18 @@ without pulling optional runtime dependencies like smolagents. """ -from typing import Any +from typing import Any, TYPE_CHECKING + +if TYPE_CHECKING: + from .python_codeact_env import PythonCodeActEnv __all__ = ["PythonCodeActEnv"] +def __dir__() -> list[str]: + return sorted({*globals(), *__all__}) + + def __getattr__(name: str) -> Any: if name == "PythonCodeActEnv": from .python_codeact_env import PythonCodeActEnv diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index eaa727152..ddebee405 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -74,7 +74,7 @@ def reset( # Initialize fresh state self._state = CodeState( - episode_id=episode_id if episode_id is not None else str(uuid.uuid4()), + episode_id=episode_id or str(uuid.uuid4()), step_count=0, ) # Add last_exit_code to state diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index 5baed87ce..e0519010f 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -7,6 +7,7 @@ """Transforms specific to coding environments.""" import ast +import re from openenv.core.env_server.base_transforms import CompositeTransform from openenv.core.env_server.interfaces import Transform @@ -15,6 +16,10 @@ from ..models import CodeObservation +def _parse_code(code: str) -> ast.AST: + return ast.parse(code) + + class CodeSafetyTransform(Transform): """ Assign penalties for obviously unsafe coding patterns. @@ -25,6 +30,20 @@ class CodeSafetyTransform(Transform): def __init__(self, penalty: float = -1.0): self.penalty = penalty + self._fallback_patterns = [ + (re.compile(r"\bimport\s+os\b"), "import os"), + (re.compile(r"\bimport\s+subprocess\b"), "import subprocess"), + (re.compile(r"\beval\s*\("), "eval"), + (re.compile(r"\bexec\s*\("), "exec"), + (re.compile(r"\b__import__\s*\("), "__import__"), + (re.compile(r"\bopen\s*\("), "open"), + ] + + def _detect_text_violation(self, code: str) -> str | None: + for pattern, violation in self._fallback_patterns: + if pattern.search(code): + return violation + return None def _detect_violation(self, code: str) -> str | None: """ @@ -35,13 +54,11 @@ def _detect_violation(self, code: str) -> str | None: (e.g. ``myopen()``). """ try: - tree = ast.parse(code) + tree = _parse_code(code) except (SyntaxError, RecursionError, ValueError): - # Intentional trade-off: once the code is syntactically invalid or - # pathologically nested, this AST-only safety pass cannot reliably - # inspect partial code. CodeQualityTransform applies the syntax - # penalty instead. - return None + # Fall back to the previous raw-text heuristic when AST parsing + # cannot inspect malformed or pathologically nested code. + return self._detect_text_violation(code) for node in ast.walk(tree): if isinstance(node, ast.Import): @@ -60,6 +77,8 @@ def _detect_violation(self, code: str) -> str | None: called_name = node.func.id if called_name in {"eval", "exec", "open", "__import__"}: return called_name + if isinstance(node.func, ast.Attribute) and node.func.attr == "open": + return "open" return None @@ -107,7 +126,7 @@ def __call__(self, observation: Observation) -> Observation: # Check syntax (redundant but useful for quality assessment) try: - ast.parse(code) + _parse_code(code) except (SyntaxError, RecursionError, ValueError): quality_score += self.syntax_penalty diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index c222da9cc..68b19254c 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -61,6 +61,18 @@ def test_blocks_builtin_open_call(): assert "safety_violation" in observation.metadata +def test_blocks_attribute_open_call(): + observation = _apply_safety_transform("Path('f.txt').open()") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "open" + + +def test_blocks_raw_text_violation_when_parse_fails(): + observation = _apply_safety_transform("import os\n\x00") + assert observation.reward == -1.0 + assert observation.metadata["safety_violation"] == "import os" + + def test_blocks_builtin_eval_call(): observation = _apply_safety_transform("result = eval('1 + 1')") assert observation.reward == -1.0 @@ -109,7 +121,9 @@ def test_quality_transform_handles_ast_recursion_error(monkeypatch): def raise_recursion_error(_code: str): raise RecursionError("pathologically nested code") - monkeypatch.setattr("coding_env.server.transforms.ast.parse", raise_recursion_error) + monkeypatch.setattr( + "coding_env.server.transforms._parse_code", raise_recursion_error + ) transform = CodeQualityTransform(concise_bonus=0.0, syntax_penalty=-0.2) observation = CodeObservation( diff --git a/tests/envs/test_python_codeact_reset.py b/tests/envs/test_python_codeact_reset.py index 801cd46dc..a7336a9e9 100644 --- a/tests/envs/test_python_codeact_reset.py +++ b/tests/envs/test_python_codeact_reset.py @@ -188,13 +188,13 @@ def test_reset_accepts_seed_parameter(): assert env.state.step_count == 0 -def test_reset_preserves_empty_episode_id_override(): - """Test that reset() preserves any explicit non-None episode_id.""" +def test_reset_replaces_empty_episode_id_override(): + """Test that reset() replaces an empty episode_id with a generated ID.""" env = PythonCodeActEnv() env.reset(episode_id="") - assert env.state.episode_id == "" + assert env.state.episode_id assert env.state.step_count == 0 From 8afc70728a78cfc72c63cbed1e1f83bf6df7b364 Mon Sep 17 00:00:00 2001 From: abhinavgautam01 Date: Mon, 15 Jun 2026 13:51:40 +0530 Subject: [PATCH 11/11] Address coding env review cleanup --- envs/coding_env/server/python_codeact_env.py | 3 ++- envs/coding_env/server/transforms.py | 9 +++------ tests/envs/test_coding_safety_transform.py | 11 +++++------ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/envs/coding_env/server/python_codeact_env.py b/envs/coding_env/server/python_codeact_env.py index ddebee405..aa96bfa15 100644 --- a/envs/coding_env/server/python_codeact_env.py +++ b/envs/coding_env/server/python_codeact_env.py @@ -63,7 +63,8 @@ def reset( Args: seed: Accepted for API compatibility. This deterministic executor has no random state to seed. - episode_id: Optional episode identifier override. + episode_id: Optional episode identifier override. If omitted or + empty, a new episode ID is generated. **kwargs: Forward-compatible reset parameters accepted by the base Environment API but unused by this environment. diff --git a/envs/coding_env/server/transforms.py b/envs/coding_env/server/transforms.py index e0519010f..a03a77cff 100644 --- a/envs/coding_env/server/transforms.py +++ b/envs/coding_env/server/transforms.py @@ -16,10 +16,6 @@ from ..models import CodeObservation -def _parse_code(code: str) -> ast.AST: - return ast.parse(code) - - class CodeSafetyTransform(Transform): """ Assign penalties for obviously unsafe coding patterns. @@ -37,6 +33,7 @@ def __init__(self, penalty: float = -1.0): (re.compile(r"\bexec\s*\("), "exec"), (re.compile(r"\b__import__\s*\("), "__import__"), (re.compile(r"\bopen\s*\("), "open"), + (re.compile(r"\.open\s*\("), "open"), ] def _detect_text_violation(self, code: str) -> str | None: @@ -54,7 +51,7 @@ def _detect_violation(self, code: str) -> str | None: (e.g. ``myopen()``). """ try: - tree = _parse_code(code) + tree = ast.parse(code) except (SyntaxError, RecursionError, ValueError): # Fall back to the previous raw-text heuristic when AST parsing # cannot inspect malformed or pathologically nested code. @@ -126,7 +123,7 @@ def __call__(self, observation: Observation) -> Observation: # Check syntax (redundant but useful for quality assessment) try: - _parse_code(code) + ast.parse(code) except (SyntaxError, RecursionError, ValueError): quality_score += self.syntax_penalty diff --git a/tests/envs/test_coding_safety_transform.py b/tests/envs/test_coding_safety_transform.py index 68b19254c..7e9429eda 100644 --- a/tests/envs/test_coding_safety_transform.py +++ b/tests/envs/test_coding_safety_transform.py @@ -6,6 +6,8 @@ """Tests for coding_env safety transform false-positive handling.""" +from unittest.mock import patch + from coding_env.models import CodeObservation from coding_env.server.transforms import CodeQualityTransform, CodeSafetyTransform @@ -117,14 +119,10 @@ def test_does_not_flag_attribute_method_named_exec(): assert "safety_violation" not in observation.metadata -def test_quality_transform_handles_ast_recursion_error(monkeypatch): +def test_quality_transform_handles_ast_recursion_error(): def raise_recursion_error(_code: str): raise RecursionError("pathologically nested code") - monkeypatch.setattr( - "coding_env.server.transforms._parse_code", raise_recursion_error - ) - transform = CodeQualityTransform(concise_bonus=0.0, syntax_penalty=-0.2) observation = CodeObservation( stdout="", @@ -133,7 +131,8 @@ def raise_recursion_error(_code: str): metadata={"last_code": "x = 1"}, ) - transformed = transform(observation) + with patch("coding_env.server.transforms.ast.parse", raise_recursion_error): + transformed = transform(observation) assert isinstance(transformed, CodeObservation) assert transformed.reward == -0.2