Skip to content

Commit 2e49922

Browse files
committed
Mak RLM and Monty as Sandbox
1 parent bb745b5 commit 2e49922

9 files changed

Lines changed: 191 additions & 7 deletions

File tree

README.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
[![Pre-commit](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/pre-commit.yml)
1515
[![Docs Deploy](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/deploy-docs.yml)
1616
[![Release](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml/badge.svg?branch=main)](https://github.com/SuperagenticAI/rlm-code/actions/workflows/release.yml)
17-
[![Docs](https://img.shields.io/badge/docs-mkdocs-blue.svg)](https://superagenticai.github.io/rlm-code/)
17+
[![Docs](https://img.shields.io/badge/Docs-RLM%20Code-ff7a18.svg?logo=readthedocs&logoColor=white)](https://superagenticai.github.io/rlm-code/)
1818
[![GitHub Stars](https://img.shields.io/github/stars/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/stargazers)
1919
[![GitHub Issues](https://img.shields.io/github/issues/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/issues)
2020
[![GitHub Pull Requests](https://img.shields.io/github/issues-pr/SuperagenticAI/rlm-code.svg)](https://github.com/SuperagenticAI/rlm-code/pulls)
@@ -25,6 +25,18 @@ RLM Code implements the [Recursive Language Models](https://arxiv.org/abs/2502.0
2525

2626
RLM Code wraps this algorithm in an interactive terminal UI with built-in benchmarks, trajectory replay, and observability.
2727

28+
## Documentation
29+
30+
<p align="center">
31+
<a href="https://superagenticai.github.io/rlm-code/">
32+
<img alt="Read the RLM Code Docs" src="https://img.shields.io/badge/Read%20the%20Docs-RLM%20Code-ff7a18?style=for-the-badge&logo=readthedocs&logoColor=white">
33+
</a>
34+
</p>
35+
36+
<p align="center">
37+
<a href="https://superagenticai.github.io/rlm-code/"><strong>Open the full documentation</strong></a>
38+
</p>
39+
2840
## Install
2941

3042
```bash
@@ -304,7 +316,7 @@ rlm_code/
304316
harness/ # Tool-using coding harness (/harness)
305317
```
306318

307-
## Documentation
319+
## Resources
308320

309321
Full docs: https://superagenticai.github.io/rlm-code/
310322

docs/sandbox/index.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Execution flow:
3232
| Runtime | Isolation | Notes |
3333
|---|---|---|
3434
| `local` | none | Fastest; development only |
35+
| `monty` | in-process sandbox | Sandboxed Rust interpreter via `pydantic-monty`; Python subset |
3536
| `docker` | container | Recommended default for secure local execution |
3637
| `apple-container` | container | macOS-only, behind enable gate |
3738
| `modal` | remote | Requires Modal SDK/auth |
@@ -153,9 +154,12 @@ If configured in `sandbox.docker.extra_args`, runtime creation fails with `Confi
153154

154155
## Monty and Pure RLM Backend
155156

156-
Monty is used as a **pure RLM interpreter backend** (`/sandbox backend monty`), not a general `sandbox.runtime` ID.
157+
Monty can now be used in both places:
157158

158-
Use it when you want secure in-process pure RLM execution without Docker.
159+
- Superbox runtime via `/sandbox use monty`
160+
- Pure RLM interpreter backend via `/sandbox backend monty`
161+
162+
Use Monty when you want secure in-process execution without Docker.
159163

160164
---
161165

rlm_code/core/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ class SandboxAppleContainerConfig:
102102
class SandboxConfig:
103103
"""Execution sandbox runtime configuration."""
104104

105-
runtime: str = "docker" # local | docker | apple-container | daytona | e2b
105+
runtime: str = "docker" # local | monty | docker | apple-container | daytona | e2b
106106
default_timeout_seconds: int = 30
107107
memory_limit_mb: int = 512
108108
allowed_mount_roots: list[str] = field(
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
"""Monty runtime for sandbox execution."""
2+
3+
from __future__ import annotations
4+
5+
import importlib.util
6+
7+
from ...core.exceptions import ConfigurationError
8+
from .base import RuntimeExecutionRequest, RuntimeExecutionResult
9+
10+
11+
class MontySandboxRuntime:
12+
"""Executes code using the Monty Rust-based sandboxed Python interpreter."""
13+
14+
name = "monty"
15+
16+
def __init__(
17+
self,
18+
*,
19+
type_check: bool = False,
20+
max_allocations: int | None = None,
21+
max_memory: int | None = None,
22+
max_output_chars: int = 50_000,
23+
):
24+
self.type_check = type_check
25+
self.max_allocations = max_allocations
26+
self.max_memory = max_memory
27+
self.max_output_chars = max_output_chars
28+
29+
def execute(self, request: RuntimeExecutionRequest) -> RuntimeExecutionResult:
30+
limits: dict[str, float | int] = {}
31+
if request.timeout_seconds > 0:
32+
limits["max_duration_secs"] = float(request.timeout_seconds)
33+
if self.max_allocations is not None:
34+
limits["max_allocations"] = int(self.max_allocations)
35+
if self.max_memory is not None:
36+
limits["max_memory"] = int(self.max_memory)
37+
38+
try:
39+
from ...rlm.monty_interpreter import MontyInterpreter
40+
41+
interp = MontyInterpreter(
42+
timeout=request.timeout_seconds,
43+
max_output_chars=self.max_output_chars,
44+
resource_limits=limits,
45+
type_check=self.type_check,
46+
)
47+
except ImportError as exc:
48+
raise ConfigurationError(
49+
"Monty runtime requires pydantic-monty. Install it with: pip install pydantic-monty"
50+
) from exc
51+
52+
code = request.code_file.read_text(encoding="utf-8")
53+
result = interp.execute(code)
54+
55+
stderr_parts: list[str] = []
56+
if result.type_errors:
57+
stderr_parts.append(f"TypeError:\n{result.type_errors}")
58+
if result.error:
59+
stderr_parts.append(result.error)
60+
61+
return RuntimeExecutionResult(
62+
return_code=0 if result.error is None else 1,
63+
stdout=result.output or "",
64+
stderr="\n\n".join(stderr_parts),
65+
)
66+
67+
@staticmethod
68+
def check_health() -> tuple[bool, str]:
69+
"""Return (healthy, detail) for Monty runtime availability."""
70+
if importlib.util.find_spec("pydantic_monty") is None:
71+
return False, "pydantic-monty not installed (pip install pydantic-monty)"
72+
return True, "pydantic-monty available"

rlm_code/sandbox/runtimes/registry.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from .base import SandboxRuntime
1717
from .docker_runtime import DockerSandboxRuntime
1818
from .local_runtime import LocalSandboxRuntime
19+
from .monty_runtime import MontySandboxRuntime
1920

2021
# Cloud runtimes (optional dependencies)
2122
try:
@@ -42,7 +43,7 @@
4243
logger = get_logger(__name__)
4344

4445
# Base runtimes always available
45-
SUPPORTED_RUNTIMES = {"local", "docker", "apple-container"}
46+
SUPPORTED_RUNTIMES = {"local", "monty", "docker", "apple-container"}
4647

4748
# Cloud runtimes (added if dependencies are available)
4849
CLOUD_RUNTIMES = {"modal", "e2b", "daytona"}
@@ -112,6 +113,13 @@ def create_runtime(runtime_name: str, sandbox_config: Any = None) -> SandboxRunt
112113
extra_args=extra_args,
113114
)
114115

116+
if normalized == "monty":
117+
return MontySandboxRuntime(
118+
type_check=bool(getattr(sandbox_config, "monty_type_check", False)),
119+
max_allocations=getattr(sandbox_config, "monty_max_allocations", None),
120+
max_memory=getattr(sandbox_config, "monty_max_memory", None),
121+
)
122+
115123
if normalized == "apple-container":
116124
if sandbox_config and not bool(getattr(sandbox_config, "apple_container_enabled", False)):
117125
raise ConfigurationError(
@@ -184,6 +192,10 @@ def detect_runtime_health() -> dict[str, RuntimeHealth]:
184192
docker_ok, docker_detail = DockerSandboxRuntime.check_health()
185193
results.append(RuntimeHealth(runtime="docker", available=docker_ok, detail=docker_detail))
186194

195+
# Monty runtime
196+
monty_ok, monty_detail = MontySandboxRuntime.check_health()
197+
results.append(RuntimeHealth(runtime="monty", available=monty_ok, detail=monty_detail))
198+
187199
# Apple Container runtime
188200
apple_ok, apple_detail = AppleContainerRuntime.check_health()
189201
results.append(
@@ -301,6 +313,20 @@ def run_runtime_doctor(
301313
)
302314
)
303315

316+
if runtime_name == "monty":
317+
monty_ok, monty_detail = MontySandboxRuntime.check_health()
318+
checks.append(
319+
RuntimeDoctorCheck(
320+
name="monty_runtime",
321+
status="pass" if monty_ok else "fail",
322+
detail=monty_detail,
323+
recommendation=(
324+
None if monty_ok else "Install dependency: pip install pydantic-monty"
325+
),
326+
)
327+
)
328+
return checks
329+
304330
if runtime_name not in {"docker", "apple-container"}:
305331
return checks
306332

tests/rlm/test_phase3.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ def test_cloud_runtime_health_check(self):
306306

307307
# All runtimes should have health entries
308308
assert "local" in health
309+
assert "monty" in health
309310
assert "docker" in health
310311
assert "modal" in health
311312
assert "e2b" in health

tests/rlm/test_phase4.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ class TestConfigIntegration:
340340

341341
def test_config_with_all_runtimes(self):
342342
"""Test configuration with different runtimes."""
343-
runtimes = ["local", "docker", "modal", "e2b", "daytona"]
343+
runtimes = ["local", "monty", "docker", "modal", "e2b", "daytona"]
344344

345345
for runtime in runtimes:
346346
config = RLMConfig.from_dict({"sandbox": {"runtime": runtime}})

tests/test_sandbox_runtimes.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for sandbox runtime registry and execution delegation."""
22

33
from dataclasses import dataclass, field
4+
from pathlib import Path
45
from textwrap import dedent
56

67
import pytest
@@ -9,6 +10,7 @@
910
from rlm_code.core.exceptions import ConfigurationError
1011
from rlm_code.execution.sandbox import ExecutionSandbox
1112
from rlm_code.sandbox.runtimes import (
13+
RuntimeExecutionRequest,
1214
RuntimeExecutionResult,
1315
create_runtime,
1416
detect_runtime_health,
@@ -52,6 +54,11 @@ def test_create_runtime_local():
5254
assert runtime.name == "local"
5355

5456

57+
def test_create_runtime_monty():
58+
runtime = create_runtime("monty", _SandboxCfg())
59+
assert runtime.name == "monty"
60+
61+
5562
def test_create_runtime_docker_config_applied():
5663
runtime = create_runtime("docker", _SandboxCfg())
5764
assert runtime.name == "docker"
@@ -66,6 +73,45 @@ def test_detect_runtime_health_includes_local():
6673
health = detect_runtime_health()
6774
assert "local" in health
6875
assert health["local"].available is True
76+
assert "monty" in health
77+
78+
79+
def test_monty_runtime_executes_and_maps_result(monkeypatch, tmp_path):
80+
class _FakeResult:
81+
output = "hello from monty\n"
82+
error = None
83+
type_errors = None
84+
85+
class _FakeMontyInterpreter:
86+
def __init__(self, **kwargs):
87+
self.kwargs = kwargs
88+
89+
def execute(self, code: str):
90+
assert "print" in code
91+
return _FakeResult()
92+
93+
monkeypatch.setattr(
94+
"rlm_code.rlm.monty_interpreter.MontyInterpreter",
95+
_FakeMontyInterpreter,
96+
)
97+
98+
code_file = tmp_path / "generated_code.py"
99+
code_file.write_text("print('hello from monty')", encoding="utf-8")
100+
101+
runtime = create_runtime("monty", _SandboxCfg())
102+
result = runtime.execute(
103+
RuntimeExecutionRequest(
104+
code_file=code_file,
105+
workdir=tmp_path,
106+
timeout_seconds=5,
107+
python_executable=Path("/usr/bin/python3"),
108+
env={},
109+
)
110+
)
111+
112+
assert result.return_code == 0
113+
assert result.stdout == "hello from monty\n"
114+
assert result.stderr == ""
69115

70116

71117
def test_execution_sandbox_uses_runtime_override(monkeypatch):

tests/test_slash_sandbox_command.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def test_sandbox_use_updates_config_and_engine(monkeypatch):
5858

5959
health = {
6060
"local": RuntimeHealth(runtime="local", available=True, detail="ok"),
61+
"monty": RuntimeHealth(runtime="monty", available=True, detail="ok"),
6162
"docker": RuntimeHealth(runtime="docker", available=True, detail="ok"),
6263
"apple-container": RuntimeHealth(
6364
runtime="apple-container", available=False, detail="missing"
@@ -77,6 +78,7 @@ def test_sandbox_status_runs_without_error(monkeypatch):
7778

7879
health = {
7980
"local": RuntimeHealth(runtime="local", available=True, detail="ok"),
81+
"monty": RuntimeHealth(runtime="monty", available=False, detail="missing dependency"),
8082
"docker": RuntimeHealth(runtime="docker", available=False, detail="down"),
8183
"apple-container": RuntimeHealth(
8284
runtime="apple-container", available=False, detail="missing"
@@ -204,6 +206,7 @@ def test_sandbox_manual_override_marks_profile_custom(monkeypatch):
204206

205207
health = {
206208
"local": RuntimeHealth(runtime="local", available=True, detail="ok"),
209+
"monty": RuntimeHealth(runtime="monty", available=True, detail="ok"),
207210
"docker": RuntimeHealth(runtime="docker", available=True, detail="ok"),
208211
"apple-container": RuntimeHealth(
209212
runtime="apple-container", available=False, detail="missing"
@@ -214,3 +217,23 @@ def test_sandbox_manual_override_marks_profile_custom(monkeypatch):
214217

215218
assert handler.config_manager.config.sandbox.superbox_profile == "custom"
216219
assert handler.config_manager.saved is True
220+
221+
222+
def test_sandbox_use_monty_updates_config_and_engine(monkeypatch):
223+
handler = _build_handler()
224+
225+
health = {
226+
"local": RuntimeHealth(runtime="local", available=True, detail="ok"),
227+
"monty": RuntimeHealth(runtime="monty", available=True, detail="ok"),
228+
"docker": RuntimeHealth(runtime="docker", available=True, detail="ok"),
229+
"apple-container": RuntimeHealth(
230+
runtime="apple-container", available=False, detail="missing"
231+
),
232+
}
233+
monkeypatch.setattr("rlm_code.commands.slash_commands.detect_runtime_health", lambda: health)
234+
235+
handler.cmd_sandbox(["use", "monty"])
236+
237+
assert handler.config_manager.config.sandbox.runtime == "monty"
238+
assert handler.config_manager.saved is True
239+
assert handler.execution_engine.get_runtime_name() == "monty"

0 commit comments

Comments
 (0)