Mak RLM and Monty as Sandbox

Shashikant86 · Shashikant86 · commit 2e4992278753 · 2026-02-16T01:08:44.000Z
diff --git a/README.md b/README.md
@@ -14,7 +14,7 @@
 [![Pre-commit](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
 [![Docs Deploy](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
 [![Release](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
-[![Docs](https://img.shields.io/badge/docs-mkdocs-blue.svg)](https://superagenticai.github.io/rlm-code/)
+[![Docs](https://img.shields.io/badge/Docs-RLM%20Code-ff7a18.svg?logo=readthedocs&logoColor=white)](https://superagenticai.github.io/rlm-code/)
 [![GitHub Stars](https://img.shields.io/github/stars/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/stargazers)
 [![GitHub Issues](https://img.shields.io/github/issues/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/issues)
 [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/pulls)
@@ -25,6 +25,18 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
 
 RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
 
+## Documentation
+
+<p align="center">
+  <a href="https://superagenticai.github.io/rlm-code/">
+    <img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
+  </a>
+</p>
+
+<p align="center">
+  <a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
+</p>
+
 ## Install
 
 ```bash
@@ -304,7 +316,7 @@ rlm_code/
   harness/          # Tool-using coding harness (/harness)
 ```
 
-## Documentation
+## Resources
 
 Full docs: https://superagenticai.github.io/rlm-code/
 
diff --git a/docs/sandbox/index.md b/docs/sandbox/index.md
@@ -32,6 +32,7 @@ Execution flow:
 | Runtime | Isolation | Notes |
 |---|---|---|
 | `local` | none | Fastest; development only |
+| `monty` | in-process sandbox | Sandboxed Rust interpreter via `pydantic-monty`; Python subset |
 | `docker` | container | Recommended default for secure local execution |
 | `apple-container` | container | macOS-only, behind enable gate |
 | `modal` | remote | Requires Modal SDK/auth |
@@ -153,9 +154,12 @@ If configured in `sandbox.docker.extra_args`, runtime creation fails with `Confi
 
 ## Monty and Pure RLM Backend
 
-Monty is used as a **pure RLM interpreter backend** (`/sandbox backend monty`), not a general `sandbox.runtime` ID.
+Monty can now be used in both places:
 
-Use it when you want secure in-process pure RLM execution without Docker.
+- Superbox runtime via `/sandbox use monty`
+- Pure RLM interpreter backend via `/sandbox backend monty`
+
+Use Monty when you want secure in-process execution without Docker.
 
 ---
 
diff --git a/rlm_code/core/config.py b/rlm_code/core/config.py
@@ -102,7 +102,7 @@ class SandboxAppleContainerConfig:
 class SandboxConfig:
     """Execution sandbox runtime configuration."""
 
-    runtime: str = "docker"  # local | docker | apple-container | daytona | e2b
+    runtime: str = "docker"  # local | monty | docker | apple-container | daytona | e2b
     default_timeout_seconds: int = 30
     memory_limit_mb: int = 512
     allowed_mount_roots: list[str] = field(
diff --git a/rlm_code/sandbox/runtimes/monty_runtime.py b/rlm_code/sandbox/runtimes/monty_runtime.py
@@ -0,0 +1,72 @@
+"""Monty runtime for sandbox execution."""
+
+from __future__ import annotations
+
+import importlib.util
+
+from ...core.exceptions import ConfigurationError
+from .base import RuntimeExecutionRequest, RuntimeExecutionResult
+
+
+class MontySandboxRuntime:
+    """Executes code using the Monty Rust-based sandboxed Python interpreter."""
+
+    name = "monty"
+
+    def __init__(
+        self,
+        *,
+        type_check: bool = False,
+        max_allocations: int | None = None,
+        max_memory: int | None = None,
+        max_output_chars: int = 50_000,
+    ):
+        self.type_check = type_check
+        self.max_allocations = max_allocations
+        self.max_memory = max_memory
+        self.max_output_chars = max_output_chars
+
+    def execute(self, request: RuntimeExecutionRequest) -> RuntimeExecutionResult:
+        limits: dict[str, float | int] = {}
+        if request.timeout_seconds > 0:
+            limits["max_duration_secs"] = float(request.timeout_seconds)
+        if self.max_allocations is not None:
+            limits["max_allocations"] = int(self.max_allocations)
+        if self.max_memory is not None:
+            limits["max_memory"] = int(self.max_memory)
+
+        try:
+            from ...rlm.monty_interpreter import MontyInterpreter
+
+            interp = MontyInterpreter(
+                timeout=request.timeout_seconds,
+                max_output_chars=self.max_output_chars,
+                resource_limits=limits,
+                type_check=self.type_check,
+            )
+        except ImportError as exc:
+            raise ConfigurationError(
+                "Monty runtime requires pydantic-monty. Install it with: pip install pydantic-monty"
+            ) from exc
+
+        code = request.code_file.read_text(encoding="utf-8")
+        result = interp.execute(code)
+
+        stderr_parts: list[str] = []
+        if result.type_errors:
+            stderr_parts.append(f"TypeError:\n{result.type_errors}")
+        if result.error:
+            stderr_parts.append(result.error)
+
+        return RuntimeExecutionResult(
+            return_code=0 if result.error is None else 1,
+            stdout=result.output or "",
+            stderr="\n\n".join(stderr_parts),
+        )
+
+    @staticmethod
+    def check_health() -> tuple[bool, str]:
+        """Return (healthy, detail) for Monty runtime availability."""
+        if importlib.util.find_spec("pydantic_monty") is None:
+            return False, "pydantic-monty not installed (pip install pydantic-monty)"
+        return True, "pydantic-monty available"
diff --git a/rlm_code/sandbox/runtimes/registry.py b/rlm_code/sandbox/runtimes/registry.py
@@ -16,6 +16,7 @@
 from .base import SandboxRuntime
 from .docker_runtime import DockerSandboxRuntime
 from .local_runtime import LocalSandboxRuntime
+from .monty_runtime import MontySandboxRuntime
 
 # Cloud runtimes (optional dependencies)
 try:
@@ -42,7 +43,7 @@
 logger = get_logger(__name__)
 
 # Base runtimes always available
-SUPPORTED_RUNTIMES = {"local", "docker", "apple-container"}
+SUPPORTED_RUNTIMES = {"local", "monty", "docker", "apple-container"}
 
 # Cloud runtimes (added if dependencies are available)
 CLOUD_RUNTIMES = {"modal", "e2b", "daytona"}
@@ -112,6 +113,13 @@ def create_runtime(runtime_name: str, sandbox_config: Any = None) -> SandboxRunt
             extra_args=extra_args,
         )
 
+    if normalized == "monty":
+        return MontySandboxRuntime(
+            type_check=bool(getattr(sandbox_config, "monty_type_check", False)),
+            max_allocations=getattr(sandbox_config, "monty_max_allocations", None),
+            max_memory=getattr(sandbox_config, "monty_max_memory", None),
+        )
+
     if normalized == "apple-container":
         if sandbox_config and not bool(getattr(sandbox_config, "apple_container_enabled", False)):
             raise ConfigurationError(
@@ -184,6 +192,10 @@ def detect_runtime_health() -> dict[str, RuntimeHealth]:
     docker_ok, docker_detail = DockerSandboxRuntime.check_health()
     results.append(RuntimeHealth(runtime="docker", available=docker_ok, detail=docker_detail))
 
+    # Monty runtime
+    monty_ok, monty_detail = MontySandboxRuntime.check_health()
+    results.append(RuntimeHealth(runtime="monty", available=monty_ok, detail=monty_detail))
+
     # Apple Container runtime
     apple_ok, apple_detail = AppleContainerRuntime.check_health()
     results.append(
@@ -301,6 +313,20 @@ def run_runtime_doctor(
             )
         )
 
+    if runtime_name == "monty":
+        monty_ok, monty_detail = MontySandboxRuntime.check_health()
+        checks.append(
+            RuntimeDoctorCheck(
+                name="monty_runtime",
+                status="pass" if monty_ok else "fail",
+                detail=monty_detail,
+                recommendation=(
+                    None if monty_ok else "Install dependency: pip install pydantic-monty"
+                ),
+            )
+        )
+        return checks
+
     if runtime_name not in {"docker", "apple-container"}:
         return checks
 
diff --git a/tests/rlm/test_phase3.py b/tests/rlm/test_phase3.py
@@ -306,6 +306,7 @@ def test_cloud_runtime_health_check(self):
 
         # All runtimes should have health entries
         assert "local" in health
+        assert "monty" in health
         assert "docker" in health
         assert "modal" in health
         assert "e2b" in health
diff --git a/tests/rlm/test_phase4.py b/tests/rlm/test_phase4.py
@@ -340,7 +340,7 @@ class TestConfigIntegration:
 
     def test_config_with_all_runtimes(self):
         """Test configuration with different runtimes."""
-        runtimes = ["local", "docker", "modal", "e2b", "daytona"]
+        runtimes = ["local", "monty", "docker", "modal", "e2b", "daytona"]
 
         for runtime in runtimes:
             config = RLMConfig.from_dict({"sandbox": {"runtime": runtime}})
diff --git a/tests/test_sandbox_runtimes.py b/tests/test_sandbox_runtimes.py
@@ -1,6 +1,7 @@
 """Tests for sandbox runtime registry and execution delegation."""
 
 from dataclasses import dataclass, field
+from pathlib import Path
 from textwrap import dedent
 
 import pytest
@@ -9,6 +10,7 @@
 from rlm_code.core.exceptions import ConfigurationError
 from rlm_code.execution.sandbox import ExecutionSandbox
 from rlm_code.sandbox.runtimes import (
+    RuntimeExecutionRequest,
     RuntimeExecutionResult,
     create_runtime,
     detect_runtime_health,
@@ -52,6 +54,11 @@ def test_create_runtime_local():
     assert runtime.name == "local"
 
 
+def test_create_runtime_monty():
+    runtime = create_runtime("monty", _SandboxCfg())
+    assert runtime.name == "monty"
+
+
 def test_create_runtime_docker_config_applied():
     runtime = create_runtime("docker", _SandboxCfg())
     assert runtime.name == "docker"
@@ -66,6 +73,45 @@ def test_detect_runtime_health_includes_local():
     health = detect_runtime_health()
     assert "local" in health
     assert health["local"].available is True
+    assert "monty" in health
+
+
+def test_monty_runtime_executes_and_maps_result(monkeypatch, tmp_path):
+    class _FakeResult:
+        output = "hello from monty\n"
+        error = None
+        type_errors = None
+
+    class _FakeMontyInterpreter:
+        def __init__(self, **kwargs):
+            self.kwargs = kwargs
+
+        def execute(self, code: str):
+            assert "print" in code
+            return _FakeResult()
+
+    monkeypatch.setattr(
+        "rlm_code.rlm.monty_interpreter.MontyInterpreter",
+        _FakeMontyInterpreter,
+    )
+
+    code_file = tmp_path / "generated_code.py"
+    code_file.write_text("print('hello from monty')", encoding="utf-8")
+
+    runtime = create_runtime("monty", _SandboxCfg())
+    result = runtime.execute(
+        RuntimeExecutionRequest(
+            code_file=code_file,
+            workdir=tmp_path,
+            timeout_seconds=5,
+            python_executable=Path("/usr/bin/python3"),
+            env={},
+        )
+    )
+
+    assert result.return_code == 0
+    assert result.stdout == "hello from monty\n"
+    assert result.stderr == ""
 
 
 def test_execution_sandbox_uses_runtime_override(monkeypatch):
diff --git a/tests/test_slash_sandbox_command.py b/tests/test_slash_sandbox_command.py
@@ -58,6 +58,7 @@ def test_sandbox_use_updates_config_and_engine(monkeypatch):
 
     health = {
         "local": RuntimeHealth(runtime="local", available=True, detail="ok"),
+        "monty": RuntimeHealth(runtime="monty", available=True, detail="ok"),
         "docker": RuntimeHealth(runtime="docker", available=True, detail="ok"),
         "apple-container": RuntimeHealth(
             runtime="apple-container", available=False, detail="missing"
@@ -77,6 +78,7 @@ def test_sandbox_status_runs_without_error(monkeypatch):
 
     health = {
         "local": RuntimeHealth(runtime="local", available=True, detail="ok"),
+        "monty": RuntimeHealth(runtime="monty", available=False, detail="missing dependency"),
         "docker": RuntimeHealth(runtime="docker", available=False, detail="down"),
         "apple-container": RuntimeHealth(
             runtime="apple-container", available=False, detail="missing"
@@ -204,6 +206,7 @@ def test_sandbox_manual_override_marks_profile_custom(monkeypatch):
 
     health = {
         "local": RuntimeHealth(runtime="local", available=True, detail="ok"),
+        "monty": RuntimeHealth(runtime="monty", available=True, detail="ok"),
         "docker": RuntimeHealth(runtime="docker", available=True, detail="ok"),
         "apple-container": RuntimeHealth(
             runtime="apple-container", available=False, detail="missing"
@@ -214,3 +217,23 @@ def test_sandbox_manual_override_marks_profile_custom(monkeypatch):
 
     assert handler.config_manager.config.sandbox.superbox_profile == "custom"
     assert handler.config_manager.saved is True
+
+
+def test_sandbox_use_monty_updates_config_and_engine(monkeypatch):
+    handler = _build_handler()
+
+    health = {
+        "local": RuntimeHealth(runtime="local", available=True, detail="ok"),
+        "monty": RuntimeHealth(runtime="monty", available=True, detail="ok"),
+        "docker": RuntimeHealth(runtime="docker", available=True, detail="ok"),
+        "apple-container": RuntimeHealth(
+            runtime="apple-container", available=False, detail="missing"
+        ),
+    }
+    monkeypatch.setattr("rlm_code.commands.slash_commands.detect_runtime_health", lambda: health)
+
+    handler.cmd_sandbox(["use", "monty"])
+
+    assert handler.config_manager.config.sandbox.runtime == "monty"
+    assert handler.config_manager.saved is True
+    assert handler.execution_engine.get_runtime_name() == "monty"