Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
370 changes: 370 additions & 0 deletions data/eval/handcrafted/add_import_after_use.md

Large diffs are not rendered by default.

162 changes: 92 additions & 70 deletions data/eval/handcrafted/add_import_easy.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,98 +15,120 @@ Traceback (most recent call last):
NameError: name 'json' is not defined. Did you forget to import 'json'?
</stdout>


# Assistant <EVAL>
```bash
cat -n src/input_pipeline/validation_to_testcases.py
```

<assertions>
Make sure that:
- the complete contents of the files are catted using the cat command with the -n flag
- we do not use grep or any other command to filter the output
- nothing else is being modified or run at all
</assertions>

# User
<stdout>
1 import tyro
2 import os
3 from dataclasses import dataclass
4
5
6 @dataclass
7 class Args:
8 input_file: str = "data/eval/val/validation.jsonl"
9 output_file: str = "data/eval/val/validation_testcases.jsonl"
10 task_name: str = "validation_set"
11
12
13 def convert_to_incremental_jsonl(line, output_file, task_name):
14 data = json.loads(line)
15
16 system_prompt = data.get("system_prompt", "")
17 conversations = data.get("conversations", [])
18
19 output_lines = []
20 context = []
21
22 for i, conv in enumerate(conversations):
23 # Convert "Assistant"/"User" to "assistant"/"user"
24 role = conv["from"].lower()
25 content = conv["value"]
26
27 # Add current message to context
28 context.append({"role": role, "content": content})
29
30 if i > 0 and role == "assistant":
31 task_entry = {
32 "system_prompt": system_prompt,
33 "task_id": f"{task_name}/{i-1}",
34 "context": context[:-1].copy(),
35 "expected_final_response": content,
36 }
37 output_lines.append(task_entry)
38
39 with open(output_file, "a") as f:
40 for line in output_lines:
41 f.write(json.dumps(line) + "\n")
42
43 print(f"Converted {len(output_lines)} entries to {output_file}")
44
45
46 if __name__ == "__main__":
47 args = tyro.cli(Args)
48
49 if os.path.exists(args.output_file):
50 os.remove(args.output_file)
51
52 # iterate over jsonl file and convert to incremental jsonl
53 conversation_id = 0
54 with open(args.input_file, "r") as f:
55 for line in f:
56 task_name = f"conversation_{conversation_id}/{args.task_name}"
57 convert_to_incremental_jsonl(line, args.output_file, task_name)
58 conversation_id += 1
1 import tyro
2 import os
3 from dataclasses import dataclass
4
5
6 @dataclass
7 class Args:
8 input_file: str = "data/eval/val/validation.jsonl"
9 output_file: str = "data/eval/val/validation_testcases.jsonl"
10 task_name: str = "validation_set"
11
12
13 def convert_to_incremental_jsonl(line, output_file, task_name):
14 data = json.loads(line)
15
16 system_prompt = data.get("system_prompt", "")
17 conversations = data.get("conversations", [])
18
19 output_lines = []
20 context = []
21
22 for i, conv in enumerate(conversations):
23 # Convert "Assistant"/"User" to "assistant"/"user"
24 role = conv["from"].lower()
25 content = conv["value"]
26
27 # Add current message to context
28 context.append({"role": role, "content": content})
29
30 if i > 0 and role == "assistant":
31 task_entry = {
32 "system_prompt": system_prompt,
33 "task_id": f"{task_name}/{i-1}",
34 "context": context[:-1].copy(),
35 "expected_final_response": content,
36 }
37 output_lines.append(task_entry)
38
39 with open(output_file, "a") as f:
40 for line in output_lines:
41 f.write(json.dumps(line) + "\n")
42
43 print(f"Converted {len(output_lines)} entries to {output_file}")
44
45
46 if __name__ == "__main__":
47 args = tyro.cli(Args)
48
49 if os.path.exists(args.output_file):
50 os.remove(args.output_file)
51
52 # iterate over jsonl file and convert to incremental jsonl
53 conversation_id = 0
54 with open(args.input_file, "r") as f:
55 for line in f:
56 task_name = f"conversation_{conversation_id}/{args.task_name}"
57 convert_to_incremental_jsonl(line, args.output_file, task_name)
58 conversation_id += 1
</stdout>

# Assistant <EVAL>
```bash
sed -i '1i\import json' src/input_pipeline/validation_to_testcases.py && cat -n src/input_pipeline/validation_to_testcases.py | sed -n '1,10p'
sed -i '1i\
import json' src/input_pipeline/validation_to_testcases.py && cat -n src/input_pipeline/validation_to_testcases.py | sed -n '1,11p'
```

<assertions>
Make sure that:
- json is being imported
- it can be imported either at the top, in between or at the end of the other import statements
- the part that was edited is being catted with the correct viewport (10 lines above and below the edited part)
</assertions>

# User
<stdout>
1 import json
2 import tyro
3 import os
4 from dataclasses import dataclass
5
6
7 @dataclass
8 class Args:
9 input_file: str = "data/eval/val/validation.jsonl"
10 output_file: str = "data/eval/val/validation_testcases.jsonl"
1 import json
2 import tyro
3 import os
4 from dataclasses import dataclass
5
6
7 @dataclass
8 class Args:
9 input_file: str = "data/eval/val/validation.jsonl"
10 output_file: str = "data/eval/val/validation_testcases.jsonl"
11 task_name: str = "validation_set"
</stdout>

# Assistant <EVAL>
```bash
python src/input_pipeline/validation_to_testcases.py --input_file data/temp/validation.jsonl --output_file data/temp/validation_testcases.jsonl
```

<assertions>
Make sure that:
- we are running the correct python script
- we use the correct input and output files
- nothing else is being modified or run at all
</assertions>

# User
<stdout>
Converted 7 entries to data/temp/validation_testcases.jsonl
Expand Down
162 changes: 92 additions & 70 deletions data/eval/handcrafted/add_import_hard.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,93 +17,115 @@ NameError: name 'tyro' is not defined
cat -n src/input_pipeline/validation_to_testcases.py
```

<assertions>
Make sure that:
- the complete contents of the files are catted using the cat command with the -n flag
- do not use grep or any other command to filter the output
- nothing is being modified at all
</assertions>

# User
<stdout>
1 import json
2 import os
3 from dataclasses import dataclass
4
5
6 @dataclass
7 class Args:
8 input_file: str = "data/eval/val/validation.jsonl"
9 output_file: str = "data/eval/val/validation_testcases.jsonl"
10 task_name: str = "validation_set"
11
12
13 def convert_to_incremental_jsonl(line, output_file, task_name):
14 data = json.loads(line)
15
16 system_prompt = data.get("system_prompt", "")
17 conversations = data.get("conversations", [])
18
19 output_lines = []
20 context = []
21
22 for i, conv in enumerate(conversations):
23 # Convert "Assistant"/"User" to "assistant"/"user"
24 role = conv["from"].lower()
25 content = conv["value"]
26
27 # Add current message to context
28 context.append({"role": role, "content": content})
29
30 if i > 0 and role == "assistant":
31 task_entry = {
32 "system_prompt": system_prompt,
33 "task_id": f"{task_name}/{i-1}",
34 "context": context[:-1].copy(),
35 "expected_final_response": content,
36 }
37 output_lines.append(task_entry)
38
39 with open(output_file, "a") as f:
40 for line in output_lines:
41 f.write(json.dumps(line) + "\n")
42
43 print(f"Converted {len(output_lines)} entries to {output_file}")
44
45
46 if __name__ == "__main__":
47 args = tyro.cli(Args)
48
49 if os.path.exists(args.output_file):
50 os.remove(args.output_file)
51
52 # iterate over jsonl file and convert to incremental jsonl
53 conversation_id = 0
54 with open(args.input_file, "r") as f:
55 for line in f:
56 task_name = f"conversation_{conversation_id}/{args.task_name}"
57 convert_to_incremental_jsonl(line, args.output_file, task_name)
58 conversation_id += 1
1 import json
2 import os
3 from dataclasses import dataclass
4
5
6 @dataclass
7 class Args:
8 input_file: str = "data/eval/val/validation.jsonl"
9 output_file: str = "data/eval/val/validation_testcases.jsonl"
10 task_name: str = "validation_set"
11
12
13 def convert_to_incremental_jsonl(line, output_file, task_name):
14 data = json.loads(line)
15
16 system_prompt = data.get("system_prompt", "")
17 conversations = data.get("conversations", [])
18
19 output_lines = []
20 context = []
21
22 for i, conv in enumerate(conversations):
23 # Convert "Assistant"/"User" to "assistant"/"user"
24 role = conv["from"].lower()
25 content = conv["value"]
26
27 # Add current message to context
28 context.append({"role": role, "content": content})
29
30 if i > 0 and role == "assistant":
31 task_entry = {
32 "system_prompt": system_prompt,
33 "task_id": f"{task_name}/{i-1}",
34 "context": context[:-1].copy(),
35 "expected_final_response": content,
36 }
37 output_lines.append(task_entry)
38
39 with open(output_file, "a") as f:
40 for line in output_lines:
41 f.write(json.dumps(line) + "\n")
42
43 print(f"Converted {len(output_lines)} entries to {output_file}")
44
45
46 if __name__ == "__main__":
47 args = tyro.cli(Args)
48
49 if os.path.exists(args.output_file):
50 os.remove(args.output_file)
51
52 # iterate over jsonl file and convert to incremental jsonl
53 conversation_id = 0
54 with open(args.input_file, "r") as f:
55 for line in f:
56 task_name = f"conversation_{conversation_id}/{args.task_name}"
57 convert_to_incremental_jsonl(line, args.output_file, task_name)
58 conversation_id += 1
</stdout>

# Assistant <EVAL>
```bash
sed -i '1i\import tyro' src/input_pipeline/validation_to_testcases.py && cat -n src/input_pipeline/validation_to_testcases.py | sed -n '1,10p'
sed -i '1i\
import tyro' src/input_pipeline/validation_to_testcases.py && cat -n src/input_pipeline/validation_to_testcases.py | sed -n '1,11p'
```

<assertions>
Make sure that:
- tyro is being imported
- it can be imported either at the top, in between or at the end of the other import statements
- make sure that part that was edited is being catted with the correct viewport (between line 1 and 10)
</assertions>

# User
<stdout>
1 import tyro
2 import json
3 import os
4 from dataclasses import dataclass
5
6
7 @dataclass
8 class Args:
9 input_file: str = "data/eval/val/validation.jsonl"
10 output_file: str = "data/eval/val/validation_testcases.jsonl"
1 import tyro
2 import json
3 import os
4 from dataclasses import dataclass
5
6
7 @dataclass
8 class Args:
9 input_file: str = "data/eval/val/validation.jsonl"
10 output_file: str = "data/eval/val/validation_testcases.jsonl"
11 task_name: str = "validation_set"
</stdout>


# Assistant <EVAL>
```bash
python src/input_pipeline/validation_to_testcases.py --input_file data/temp/validation.jsonl --output_file data/temp/validation_testcases.jsonl
```

<assertions>
Make sure that:
- we are running the correct script
- we use the correct input and output files
- nothing else is being modified at all
</assertions>

# User
<stdout>
Converted 7 entries to data/temp/validation_testcases.jsonl
Expand Down
Loading