-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsim.py
More file actions
37 lines (29 loc) · 1.33 KB
/
sim.py
File metadata and controls
37 lines (29 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import sys
from server.environment import CodeDebuggerEnvironment
from models import CodeDebugAction
env = CodeDebuggerEnvironment()
from server.tasks import TASKS
out_of_bounds = []
for t in TASKS:
task_id = t["task_id"]
print(f"Testing {task_id}...")
# Test 1: Empty Action
env.reset(task_id=task_id)
obs = env.step(CodeDebugAction(bug_line=1, bug_type="logic", fixed_code=""))
if not (0 < obs.reward < 1.0):
out_of_bounds.append((task_id, "empty", obs.reward))
# Test 2: Random Action
env.reset(task_id=task_id)
obs = env.step(CodeDebugAction(bug_line=1, bug_type="logic", fixed_code="def x(): pass"))
if not (0 < obs.reward < 1.0):
out_of_bounds.append((task_id, "random", obs.reward))
# Test 3: "Perfect" Action (roughly simulated by using passing_tests or expected behavior)
# If it's a legacy task, it checks test_cases.
env.reset(task_id=task_id)
raw_snippet = t.get("code_snippet", "")
# To really get perfect, we need the exact fix. Since we do not have it immediately, we can just check if any score from the grader fails.
# But wait, we can just call grade() directly with a perfect score spoof.
for err in out_of_bounds:
print("FAILED:", err)
if not out_of_bounds:
print("ALL TESTS RETURNED REWARDS STRICTLY BETWEEN 0 AND 1.")