diff --git a/README.md b/README.md
index c38824759..41d731dfc 100644
--- a/README.md
+++ b/README.md
@@ -166,6 +166,12 @@ pip install -e .
 # Or using uv (faster)
 uv pip install -e .
 
+# For development with testing dependencies (pytest, etc.)
+pip install -e ".[dev]"
+
+# Or using uv
+uv pip install -e ".[dev]"
+
 # Run server locally without Docker
 uv run server --host 0.0.0.0 --port 8000
 ```
diff --git a/examples/local_coding_env.py b/examples/local_coding_env.py
index e88dcb352..6509d7caf 100644
--- a/examples/local_coding_env.py
+++ b/examples/local_coding_env.py
@@ -67,6 +67,7 @@ def main():
             print(f"   {i}. Code: {code.replace(chr(10), '\\n')[:50]}...")
             print(f"      → stdout: {result.observation.stdout.strip()}")
             print(f"      → exit_code: {result.observation.exit_code}")
+            print(f"      → reward: {result.reward}")
             if result.observation.stderr:
                 print(f"      → stderr: {result.observation.stderr}")
 
@@ -84,6 +85,8 @@ def main():
             print(f"   {i}. {description}")
             print(f"      Code: {code.replace(chr(10), '\\n')[:40]}...")
             print(f"      → exit_code: {result.observation.exit_code}")
+            print(f"      → reward: {result.reward}")
+
             if result.observation.stderr:
                 # Truncate long error messages
                 error_msg = result.observation.stderr[:100]
@@ -116,6 +119,7 @@ def main():
     except Exception as e:
         print(f"\n❌ Test failed: {e}")
         import traceback
+
         traceback.print_exc()
         return False
 
diff --git a/pyproject.toml b/pyproject.toml
index 37d7400a2..dd943be25 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "openenv"
-version = "0.1.1"
+version = "0.1.2"
 description = "A unified framework for reinforcement learning environments"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -26,6 +26,11 @@ dependencies = [
     "tomli-w>=1.2.0"
 ]
 
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+]
+
 [project.scripts]
 openenv = "openenv_cli.__main__:main"
 
diff --git a/src/core/containers/runtime/providers.py b/src/core/containers/runtime/providers.py
index a8022ddca..529710899 100644
--- a/src/core/containers/runtime/providers.py
+++ b/src/core/containers/runtime/providers.py
@@ -192,23 +192,42 @@ def stop_container(self) -> None:
         import subprocess
 
         try:
-            # Stop container
+            # Try graceful stop first (Docker waits 5 seconds before SIGKILL)
+            # Subprocess timeout is 15 seconds to allow Docker's grace period
             subprocess.run(
-                ["docker", "stop", self._container_id],
+                ["docker", "stop", "--time=5", self._container_id],
                 capture_output=True,
                 check=True,
-                timeout=10,
+                timeout=15,
             )
+        except subprocess.TimeoutExpired:
+            # Graceful stop timed out, force kill the container
+            print(f"Warning: Container {self._container_id} did not stop gracefully, forcing kill...")
+            try:
+                subprocess.run(
+                    ["docker", "kill", self._container_id],
+                    capture_output=True,
+                    check=True,
+                    timeout=5,
+                )
+            except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
+                # Container might already be stopped
+                pass
+        except subprocess.CalledProcessError:
+            # Container might already be stopped
+            pass
 
-            # Remove container
+        # Always try to remove the container
+        try:
             subprocess.run(
-                ["docker", "rm", self._container_id],
+                ["docker", "rm", "-f", self._container_id],
                 capture_output=True,
                 check=True,
                 timeout=10,
             )
-        except subprocess.CalledProcessError:
-            # Container might already be stopped/removed
+        except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
+            # Container might already be removed or removal failed
+            # Use -f flag to force removal even if still running
             pass
         finally:
             self._container_id = None
diff --git a/src/envs/coding_env/README.md b/src/envs/coding_env/README.md
index b99921b8e..aec93982c 100644
--- a/src/envs/coding_env/README.md
+++ b/src/envs/coding_env/README.md
@@ -15,13 +15,112 @@ tags:
 
 A Python code execution environment that runs arbitrary Python code and returns results. Perfect for testing code execution infrastructure and demonstrating environment usage patterns.
 
-## Quick Start
+## Installation & Usage
 
-The simplest way to use the Coding environment is through the `CodingEnv` class:
+The Coding Environment supports two usage modes:
 
+### Mode 1: In-Repository Development (Recommended for Contributors)
+
+Use this mode when developing or contributing to OpenEnv.
+
+**Setup:**
+```bash
+# 1. Clone the repository
+git clone https://github.com/facebookresearch/OpenEnv.git
+cd OpenEnv
+
+# 2. Install in development mode
+pip install -e .
+
+# 3. Build the Docker image (from repo root)
+docker build -t coding-env:latest -f src/envs/coding_env/server/Dockerfile .
+
+# 4. Run the example
+python ./examples/local_coding_env.py
+```
+
+**Code example:**
 ```python
+# Use in-repo import paths
 from envs.coding_env import CodeAction, CodingEnv
 
+try:
+    # Create environment from Docker image
+    coding_env = CodingEnv.from_docker_image("coding-env:latest")
+
+    # Execute Python code
+    result = coding_env.step(CodeAction(code="print('Hello, World!')"))
+    print(f"stdout: {result.observation.stdout.strip()}")
+    print(f"exit_code: {result.observation.exit_code}")
+finally:
+    coding_env.close()
+```
+
+### Mode 2: Standalone Package (For End Users)
+
+Use this mode when using coding_env as a standalone package.
+
+**Setup:**
+```bash
+# 1. Install openenv-core (once available on PyPI)
+pip install openenv-core
+
+# 2. Install coding_env package
+pip install openenv-coding_env
+
+# 3. Use the same Docker image as in-repo mode
+# The client-server communicate over HTTP, so the Docker build mode doesn't matter for testing
+# You can use the in-repo built image: coding-env:latest
+```
+
+**Code example:**
+```python
+# Use standalone import paths
+from coding_env import CodeAction, CodingEnv
+
+try:
+    # Connect to the same Docker image built in in-repo mode
+    coding_env = CodingEnv.from_docker_image("coding-env:latest")
+    result = coding_env.step(CodeAction(code="print('Hello, World!')"))
+    print(f"stdout: {result.observation.stdout.strip()}")
+finally:
+    coding_env.close()
+```
+
+## Quick Start Example
+
+**In-repo mode:**
+```bash
+# From OpenEnv repo root, after pip install -e .
+python ./examples/local_coding_env.py
+```
+
+**Standalone mode:**
+
+For standalone testing, use a separate test script (the repo example uses in-repo imports only):
+
+```python
+# save as test_standalone.py
+from coding_env import CodeAction, CodingEnv
+
+try:
+    # Uses the same Docker image as in-repo mode
+    client = CodingEnv.from_docker_image("coding-env:latest")
+    result = client.step(CodeAction(code="print('Hello from standalone!')"))
+    print(f"stdout: {result.observation.stdout.strip()}")
+finally:
+    client.close()
+```
+
+**Note:** The client (your Python code) and server (Docker container) are independent. The standalone client can connect to the in-repo Docker image because they communicate over HTTP.
+
+### Manual Usage Example
+
+Once set up (either mode), the usage is identical:
+
+```python
+from coding_env import CodeAction, CodingEnv  # or: from envs.coding_env import ...
+
 try:
     # Create environment from Docker image
     coding_env = CodingEnv.from_docker_image("coding-env:latest")
@@ -48,21 +147,12 @@ finally:
     coding_env.close()
 ```
 
-That's it! The `CodingEnv.from_docker_image()` method handles:
+The `CodingEnv.from_docker_image()` method handles:
 - Starting the Docker container
 - Waiting for the server to be ready
 - Connecting to the environment
 - Container cleanup when you call `close()`
 
-## Building the Docker Image
-
-Before using the environment, you need to build the Docker image:
-
-```bash
-# From project root
-docker build -t coding-env:latest -f src/envs/coding_env/server/Dockerfile .
-```
-
 ## Environment Details
 
 ### Action
@@ -88,10 +178,14 @@ docker build -t coding-env:latest -f src/envs/coding_env/server/Dockerfile .
 If you already have a Coding environment server running, you can connect directly:
 
 ```python
+# In-repo mode
 from envs.coding_env import CodingEnv
 
+# OR standalone mode
+from coding_env import CodingEnv
+
 # Connect to existing server
-coding_env = CodingEnv(base_url="<ENV_HTTP_URL_HERE>")
+coding_env = CodingEnv(base_url="http://localhost:8000")
 
 # Use as normal
 result = coding_env.reset()
@@ -100,34 +194,60 @@ result = coding_env.step(CodeAction(code="print('Hello!')"))
 
 Note: When connecting to an existing server, `coding_env.close()` will NOT stop the server.
 
-## Development & Testing
+## Docker Build Options
+
+The Dockerfile supports two build modes:
+
+```bash
+# In-repo build (default) - from OpenEnv repo root
+docker build -t coding-env:latest -f src/envs/coding_env/server/Dockerfile .
+
+# Standalone build - from coding_env package directory (for distribution only)
+docker build -t coding-env:standalone -f server/Dockerfile --build-arg BUILD_MODE=standalone .
+```
+
+**When to use each mode:**
+
+- **In-repo mode (default)**: For development and testing (works with both client modes)
+- **Standalone mode**: Only needed when distributing the Docker image without the full OpenEnv repo
 
-### Running the Full Example
+**Important:** For local testing, the in-repo Docker image works with both in-repo and standalone clients. The client and server communicate over HTTP, so they're independent. The BUILD_MODE distinction is primarily for distribution/packaging purposes.
+
+## Development & Testing
 
-Run the complete example that demonstrates the full workflow:
+### Running Tests
 
 ```bash
-python3 src/envs/coding_env/client/example_usage.py
+# From repo root
+pytest tests/envs/test_python_codeact_reset.py
 ```
 
-This example shows:
-- Creating an environment from a Docker image
-- Resetting and executing code through the environment
-- Automatic cleanup with `close()`
+### Building Packages Locally
+
+```bash
+# Build openenv-core
+cd src
+python -m build -w
+
+# Build coding_env
+cd envs/coding_env
+python -m build -w
+```
 
 ## Project Structure
 
 ```
 coding_env/
-├── README.md              # This file
-├── models.py              # Action, Observation, and State models
-├── client/
-│   ├── coding_env_client.py  # CodingEnv client implementation
-│   └── example_usage.py      # Usage examples
+├── README.md                  # This file
+├── pyproject.toml             # Package configuration
+├── __init__.py                # Package exports
+├── models.py                  # Action, Observation, and State models
+├── client.py                  # CodingEnv client implementation
 └── server/
     ├── python_codeact_env.py  # Core environment logic
+    ├── python_executor.py     # Code execution wrapper
     ├── app.py                 # FastAPI application
     ├── transforms.py          # Observation transforms
-    ├── Dockerfile             # Container image definition
+    ├── Dockerfile             # Container image (dual-mode)
     └── README.md              # Server-specific documentation
 ```
diff --git a/src/envs/coding_env/client.py b/src/envs/coding_env/client.py
index d65c5152e..4b7e40e28 100644
--- a/src/envs/coding_env/client.py
+++ b/src/envs/coding_env/client.py
@@ -13,11 +13,18 @@
 
 from __future__ import annotations
 
-from openenv_core.client_types import StepResult
-
-from openenv_core.http_env_client import HTTPEnvClient
-
-from coding_env.models import CodeAction, CodeObservation, CodeState
+# Support both standalone and in-repo imports
+try:
+    # Standalone imports (when installed from pip)
+    from openenv_core.client_types import StepResult
+    from openenv_core.http_env_client import HTTPEnvClient
+except ImportError:
+    # In-repo imports (when running from OpenEnv repository)
+    from core.client_types import StepResult
+    from core.http_env_client import HTTPEnvClient
+
+# Use relative imports for sibling modules - works in both modes
+from .models import CodeAction, CodeObservation, CodeState
 
 
 class CodingEnv(HTTPEnvClient[CodeAction, CodeObservation]):
diff --git a/src/envs/coding_env/models.py b/src/envs/coding_env/models.py
index a92c2560e..6f6330212 100644
--- a/src/envs/coding_env/models.py
+++ b/src/envs/coding_env/models.py
@@ -8,7 +8,13 @@
 
 from dataclasses import dataclass
 
-from openenv_core.env_server.interfaces import Action, Observation, State
+# Support both standalone and in-repo imports
+try:
+    # Standalone imports (when installed from pip)
+    from openenv_core.env_server.types import Action, Observation, State
+except ImportError:
+    # In-repo imports (when running from OpenEnv repository)
+    from core.env_server.types import Action, Observation, State
 
 
 @dataclass
diff --git a/src/envs/coding_env/pyproject.toml b/src/envs/coding_env/pyproject.toml
index 06b70f2ba..c35f6f3a5 100644
--- a/src/envs/coding_env/pyproject.toml
+++ b/src/envs/coding_env/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "openenv-coding_env"
-version = "0.1.0"
+version = "0.1.1"
 description = "Coding Environment for OpenEnv"
 requires-python = ">=3.10"
 dependencies = [
diff --git a/src/envs/coding_env/server/Dockerfile b/src/envs/coding_env/server/Dockerfile
index cef367db9..efb5cadf2 100644
--- a/src/envs/coding_env/server/Dockerfile
+++ b/src/envs/coding_env/server/Dockerfile
@@ -1,26 +1,73 @@
-# Base image
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# Dockerfile for Coding Environment
+# Supports both in-repo and standalone builds
+#
+# In-repo build (from repo root):
+#   docker build -t coding-env:latest -f src/envs/coding_env/server/Dockerfile .
+#
+# Standalone build (from coding_env directory with openenv-core on PyPI):
+#   docker build -t coding-env:latest -f server/Dockerfile --build-arg BUILD_MODE=standalone .
+
 FROM python:3.11-slim
 
+# Build argument to control mode
+ARG BUILD_MODE=in-repo
+
 # Set working directory
-WORKDIR /app/env
+WORKDIR /app
 
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
     git \
+    curl \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy environment files
-COPY . .
+# Copy files based on build mode
+# For in-repo: copy entire src/ directory from repo root
+# For standalone: copy current directory (coding_env package)
+COPY ${BUILD_MODE:+src/} ./
 
-# Install Python dependencies
-RUN pip install --no-cache-dir -e .
+# Install dependencies
+RUN if [ "$BUILD_MODE" = "in-repo" ]; then \
+        # In-repo: install core dependencies directly \
+        pip install --no-cache-dir \
+            'fastapi>=0.104.0' \
+            'pydantic>=2.0.0' \
+            'uvicorn[standard]>=0.24.0' \
+            'requests>=2.25.0' \
+            'smolagents>=1.22.0,<2'; \
+    else \
+        # Standalone: install from pyproject.toml (includes openenv-core from PyPI) \
+        pip install --no-cache-dir -e .; \
+    fi
+
+# Convert ARG to ENV so it's available at runtime
+ENV BUILD_MODE=${BUILD_MODE}
+ENV PYTHONUNBUFFERED=1
+ENV ENABLE_WEB_INTERFACE=true
+
+# Set PYTHONPATH based on build mode (evaluated at build time)
+RUN if [ "$BUILD_MODE" = "in-repo" ]; then \
+        echo "export PYTHONPATH=/app/src" >> /etc/environment; \
+    fi
 
 # Expose port
 EXPOSE 8000
 
-# Set environment variables
-ENV PYTHONUNBUFFERED=1
-ENV ENABLE_WEB_INTERFACE=true
+# Health check
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/health || exit 1
 
-# Run the server
-CMD ["python", "-m", "uvicorn", "coding_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
+# Run the server with correct module path based on build mode
+# In-repo: envs.coding_env.server.app:app (with PYTHONPATH=/app/src)
+# Standalone: coding_env.server.app:app (with package installed)
+CMD if [ "$BUILD_MODE" = "in-repo" ]; then \
+        export PYTHONPATH=/app/src && uvicorn envs.coding_env.server.app:app --host 0.0.0.0 --port 8000; \
+    else \
+        uvicorn coding_env.server.app:app --host 0.0.0.0 --port 8000; \
+    fi
diff --git a/src/envs/coding_env/server/app.py b/src/envs/coding_env/server/app.py
index 1a5edf7cb..6bfbf962a 100644
--- a/src/envs/coding_env/server/app.py
+++ b/src/envs/coding_env/server/app.py
@@ -21,10 +21,23 @@
     python -m envs.coding_env.server.app
 """
 
-from openenv_core.env_server import create_app
-
-from coding_env.models import CodeAction, CodeObservation
-from coding_env.server.python_codeact_env import PythonCodeActEnv
+# Support both standalone and in-repo imports
+try:
+    # Standalone imports (when installed from pip)
+    from openenv_core.env_server import create_app
+except ImportError:
+    # In-repo imports (when running from OpenEnv repository)
+    from core.env_server import create_app
+
+# Use relative/absolute imports that work in both modes
+try:
+    # Standalone mode
+    from coding_env.models import CodeAction, CodeObservation
+    from coding_env.server.python_codeact_env import PythonCodeActEnv
+except ImportError:
+    # In-repo mode
+    from envs.coding_env.models import CodeAction, CodeObservation
+    from envs.coding_env.server.python_codeact_env import PythonCodeActEnv
 
 # Create the environment instance
 env = PythonCodeActEnv()
diff --git a/src/envs/coding_env/server/python_codeact_env.py b/src/envs/coding_env/server/python_codeact_env.py
index ecc93d9fe..3e744a5ee 100644
--- a/src/envs/coding_env/server/python_codeact_env.py
+++ b/src/envs/coding_env/server/python_codeact_env.py
@@ -13,11 +13,27 @@
 
 import uuid
 
-from openenv_core.env_server.interfaces import Action, Environment, Observation
-from coding_env.server.python_executor import PyExecutor
+# Support both standalone and in-repo imports
+try:
+    # Standalone imports (when installed from pip)
+    from openenv_core.env_server.interfaces import Action, Environment, Observation
+except ImportError:
+    # In-repo imports (when running from OpenEnv repository)
+    from core.env_server.interfaces import Action, Environment, Observation
 
-from coding_env.models import CodeAction, CodeObservation, CodeState
-from .transforms import create_safe_coding_transform
+# Use relative/absolute imports that work in both modes
+try:
+    from coding_env.models import CodeAction, CodeObservation, CodeState
+
+    # Standalone mode
+    from coding_env.server.python_executor import PyExecutor
+    from coding_env.server.transforms import create_safe_coding_transform
+except ImportError:
+    from envs.coding_env.models import CodeAction, CodeObservation, CodeState
+
+    # In-repo mode
+    from envs.coding_env.server.python_executor import PyExecutor
+    from envs.coding_env.server.transforms import create_safe_coding_transform
 
 
 class PythonCodeActEnv(Environment):
@@ -105,6 +121,7 @@ def step(self, action: Action) -> Observation:
             stdout=result.stdout,
             stderr=result.stderr,
             exit_code=result.exit_code,
+            metadata={"last_code": action.code},  # Add code to metadata for transforms
         )
 
         return self._apply_transform(observation)
diff --git a/src/envs/coding_env/server/python_executor.py b/src/envs/coding_env/server/python_executor.py
index 17b6ecc13..19024f0da 100644
--- a/src/envs/coding_env/server/python_executor.py
+++ b/src/envs/coding_env/server/python_executor.py
@@ -27,7 +27,13 @@
 
 from smolagents import LocalPythonExecutor
 
-from openenv_core.env_server.types import CodeExecResult
+# Support both standalone and in-repo imports
+try:
+    # Standalone imports (when installed from pip)
+    from openenv_core.env_server.types import CodeExecResult
+except ImportError:
+    # In-repo imports (when running from OpenEnv repository)
+    from core.env_server.types import CodeExecResult
 
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
diff --git a/src/envs/coding_env/server/transforms.py b/src/envs/coding_env/server/transforms.py
index ee5a1c4b0..39d2e5a80 100644
--- a/src/envs/coding_env/server/transforms.py
+++ b/src/envs/coding_env/server/transforms.py
@@ -9,11 +9,25 @@
 import ast
 import re
 
-from openenv_core.env_server.base_transforms import CompositeTransform
-from openenv_core.env_server.interfaces import Transform
-from openenv_core.env_server.types import Observation
-
-from coding_env.models import CodeObservation
+# Support both standalone and in-repo imports
+try:
+    # Standalone imports (when installed from pip)
+    from openenv_core.env_server.base_transforms import CompositeTransform
+    from openenv_core.env_server.interfaces import Transform
+    from openenv_core.env_server.types import Observation
+except ImportError:
+    # In-repo imports (when running from OpenEnv repository)
+    from core.env_server.base_transforms import CompositeTransform
+    from core.env_server.interfaces import Transform
+    from core.env_server.types import Observation
+
+# Use relative/absolute imports that work in both modes
+try:
+    # Standalone mode
+    from coding_env.models import CodeObservation
+except ImportError:
+    # In-repo mode
+    from envs.coding_env.models import CodeObservation
 
 
 class CodeSafetyTransform(Transform):
diff --git a/tests/envs/test_python_codeact_rewards.py b/tests/envs/test_python_codeact_rewards.py
new file mode 100644
index 000000000..0a5bd811e
--- /dev/null
+++ b/tests/envs/test_python_codeact_rewards.py
@@ -0,0 +1,270 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Test that PythonCodeActEnv properly computes rewards via transform pipeline."""
+
+import sys
+from pathlib import Path
+
+import pytest
+
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
+
+from envs.coding_env.models import CodeAction
+from envs.coding_env.server.python_codeact_env import PythonCodeActEnv
+
+
+# ============================================================================
+# Fixtures
+# ============================================================================
+
+
+@pytest.fixture
+def env():
+    """Provides a fresh PythonCodeActEnv for each test."""
+    environment = PythonCodeActEnv()
+    environment.reset()
+    return environment
+
+
+@pytest.fixture
+def env_with_variable(env):
+    """Environment with a variable already defined."""
+    env.step(CodeAction(code="test_var = 42"))
+    return env
+
+
+# ============================================================================
+# Parametrized Tests - Reward Computation
+# ============================================================================
+
+
+@pytest.mark.parametrize(
+    "code,expected_reward,expected_exit_code,description",
+    [
+        # Safe + concise code
+        ("x = 5", 0.1, 0, "safe + concise"),
+        ("print('Hello')", 0.1, 0, "safe + concise print"),
+        ("y = 10 + 5", 0.1, 0, "safe + concise calculation"),
+        # Safe + verbose code (>100 chars, no concise bonus)
+        ("x = " + " + ".join(str(i) for i in range(50)), 0.0, 0, "safe + verbose"),
+        # Dangerous + concise (-1.0 safety + 0.1 concise = -0.9)
+        # NOTE: These actually fail at execution, so exit_code=1
+        ("import os", -0.9, 1, "dangerous + concise"),
+        ("eval('1+1')", -0.9, 1, "dangerous eval"),
+        ("exec('x=1')", -0.9, 1, "dangerous exec"),
+        ("with open('f.txt') as f: pass", -0.9, 1, "dangerous open"),
+        # Dangerous + verbose (-1.0 safety, no concise bonus)
+        ("import os\n" + "x = 1\n" * 50, -1.0, 1, "dangerous + verbose"),
+        # Syntax error + concise (0.0 safe - 0.2 syntax + 0.1 concise = -0.1)
+        ("print('unclosed", -0.1, 1, "syntax error + concise"),
+        # Syntax error + verbose (0.0 safe - 0.2 syntax = -0.2)
+        (
+            "x = " + " + ".join(str(i) for i in range(50)) + "\nprint('unclosed",
+            -0.2,
+            1,
+            "syntax error + verbose",
+        ),
+    ],
+    ids=lambda x: (
+        x if isinstance(x, str) and len(x) < 20 else None
+    ),  # Use description for test IDs
+)
+def test_reward_computation(
+    env, code, expected_reward, expected_exit_code, description
+):
+    """Test reward computation for various code patterns.
+
+    Parametrized test covering:
+    - Safe code (concise and verbose)
+    - Dangerous patterns (import os, eval, exec, open)
+    - Syntax errors
+    - Combinations of safety and quality transforms
+
+    Uses pytest.approx() for all float comparisons since rewards are computed
+    via floating point addition in the transform pipeline (transforms.py line 101).
+    """
+    action = CodeAction(code=code)
+    obs = env.step(action)
+
+    assert obs.reward == pytest.approx(
+        expected_reward, rel=1e-9
+    ), f"{description}: expected reward {expected_reward}, got {obs.reward}"
+    assert (
+        obs.exit_code == expected_exit_code
+    ), f"{description}: expected exit_code {expected_exit_code}, got {obs.exit_code}"
+
+
+# ============================================================================
+# Metadata Tests
+# ============================================================================
+
+
+def test_metadata_contains_last_code(env):
+    """Test that step() includes executed code in observation metadata.
+
+    This is CRITICAL for the transform pipeline to evaluate code and assign rewards.
+    Without metadata["last_code"], transforms cannot access the code and rewards
+    will always be None.
+    """
+    code = "print('Hello, World!')"
+    action = CodeAction(code=code)
+    obs = env.step(action)
+
+    assert (
+        "last_code" in obs.metadata
+    ), "metadata must contain 'last_code' for transform pipeline to evaluate code"
+    assert (
+        obs.metadata["last_code"] == code
+    ), f"metadata['last_code'] should be '{code}', got '{obs.metadata.get('last_code')}'"
+
+
+@pytest.mark.parametrize(
+    "code,should_have_violation",
+    [
+        ("import os", True),
+        ("eval('1+1')", True),
+        ("open('file.txt')", True),
+        ("print('safe')", False),
+        ("x = 1 + 2", False),
+    ],
+)
+def test_metadata_safety_violations(env, code, should_have_violation):
+    """Test that metadata correctly tracks safety violations."""
+    action = CodeAction(code=code)
+    obs = env.step(action)
+
+    assert "last_code" in obs.metadata
+    assert obs.metadata["last_code"] == code
+
+    if should_have_violation:
+        assert (
+            "safety_violation" in obs.metadata
+        ), f"Code '{code}' should have safety_violation in metadata"
+    else:
+        assert (
+            "safety_violation" not in obs.metadata
+        ), f"Code '{code}' should NOT have safety_violation in metadata"
+
+
+# ============================================================================
+# Consistency and State Tests
+# ============================================================================
+
+
+def test_reward_not_none_for_safe_code(env):
+    """Test that safe code always receives a non-None reward."""
+    action = CodeAction(code="print('Hello')")
+    obs = env.step(action)
+
+    assert obs.reward is not None, "Safe code should receive a reward (not None)"
+    assert obs.exit_code == 0, "Safe code should execute successfully"
+
+
+def test_reward_consistency_across_steps(env):
+    """Test that rewards are computed consistently across multiple steps."""
+    for i in range(5):
+        action = CodeAction(code=f"x = {i}")
+        obs = env.step(action)
+
+        assert obs.reward is not None, f"Step {i}: Reward should not be None"
+        assert obs.reward == pytest.approx(
+            0.1, rel=1e-9
+        ), f"Step {i}: Should get consistent 0.1 reward, got {obs.reward}"
+
+
+def test_reset_preserves_transform_functionality(env):
+    """Test that reset() doesn't break reward computation."""
+    # First episode
+    action1 = CodeAction(code="x = 1")
+    obs1 = env.step(action1)
+    assert obs1.reward == pytest.approx(0.1, rel=1e-9)
+
+    # Reset and start new episode
+    env.reset()
+    action2 = CodeAction(code="y = 2")
+    obs2 = env.step(action2)
+    assert obs2.reward == pytest.approx(
+        0.1, rel=1e-9
+    ), "Reward computation should work after reset"
+
+
+# ============================================================================
+# Fixture Composition Tests
+# ============================================================================
+
+
+def test_using_composed_fixture(env_with_variable):
+    """Test using an environment that builds on base fixture."""
+    action = CodeAction(code="print(test_var)")
+    obs = env_with_variable.step(action)
+
+    assert obs.exit_code == 0
+    assert "42" in obs.stdout
+    assert obs.reward == pytest.approx(0.1, rel=1e-9)
+
+
+@pytest.mark.parametrize(
+    "code,expected_output",
+    [
+        ("print(test_var)", "42"),
+        ("print(test_var * 2)", "84"),
+        ("print(test_var + 8)", "50"),
+    ],
+)
+def test_fixture_with_parametrization(env_with_variable, code, expected_output):
+    """Test combining fixtures with parametrization."""
+    action = CodeAction(code=code)
+    obs = env_with_variable.step(action)
+
+    assert obs.exit_code == 0
+    assert expected_output in obs.stdout
+    assert obs.reward == pytest.approx(0.1, rel=1e-9)
+
+
+# ============================================================================
+# Edge Cases and Special Patterns
+# ============================================================================
+
+
+@pytest.mark.parametrize(
+    "dangerous_pattern",
+    [
+        "import os",
+        "import subprocess",
+        "eval('x')",
+        "exec('x=1')",
+        "__import__('os')",
+        "open('file.txt')",
+    ],
+)
+def test_all_dangerous_patterns_detected(env, dangerous_pattern):
+    """Test that all dangerous patterns are correctly detected and penalized."""
+    action = CodeAction(code=dangerous_pattern)
+    obs = env.step(action)
+
+    # Concise dangerous code gets -0.9 (-1.0 safety + 0.1 concise)
+    assert obs.reward == pytest.approx(
+        -0.9, rel=1e-9
+    ), f"Pattern '{dangerous_pattern}' should get -0.9 reward, got {obs.reward}"
+    assert "safety_violation" in obs.metadata
+
+
+def test_multiline_code_with_mixed_patterns(env):
+    """Test code with both safe and dangerous patterns (dangerous wins)."""
+    code = """
+x = 5
+y = 10
+import os
+z = x + y
+"""
+    action = CodeAction(code=code)
+    obs = env.step(action)
+
+    # Should be flagged as dangerous even with safe code mixed in
+    assert obs.reward < 0, "Code with dangerous import should have negative reward"
+    assert "safety_violation" in obs.metadata