Skip to content

Commit 2ed8b41

Browse files
committed
feat(swe-bench): isolate workspace per instance for parallel execution
Add instance_id to workspace path to prevent git conflicts between parallel instances. Each instance now uses .workspace/{instance_id}/org/repo instead of sharing .workspace/org/repo. Performance: 10 instances in ~8 min (vs ~30 min sequential) Results: 7/10 resolved (70%) with Claude Sonnet Signed-off-by: Jerry Guan <jerryguan777@gmail.com>
1 parent dbc2dd6 commit 2ed8b41

4 files changed

Lines changed: 64 additions & 43 deletions

File tree

examples/evaluation_and_profiling/swe_bench/src/nat_swe_bench/configs/config_iterative.yml

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,32 +14,28 @@
1414
# limitations under the License.
1515

1616
llms:
17-
nim_llm:
18-
_type: nim
19-
model_name: mistralai/mistral-nemotron
20-
temperature: 0.6
21-
max_tokens: 4096
22-
23-
llms:
17+
nim_llm:
18+
_type: nim
19+
model_name: mistralai/mistral-nemotron
20+
temperature: 0.0
21+
max_tokens: 4096
2422
claude_sonnet_llm:
2523
_type: litellm
2624
model_name: anthropic/claude-sonnet-4-5-20250929
2725
temperature: 0.0
28-
api_key: "${ANTHROPIC_API_KEY}" # Set this environment variable before running
29-
30-
# llms:
31-
# openai_llm:
32-
# _type: litellm
33-
# model_name: openai/gpt-5.2
34-
# temperature: 0.0
35-
# api_key: "${OPENAI_API_KEY}" # Set this environment variable before running
26+
api_key: "${ANTHROPIC_API_KEY}"
27+
openai_llm:
28+
_type: litellm
29+
model_name: openai/gpt-5.2
30+
temperature: 0.0
31+
api_key: "${OPENAI_API_KEY}"
3632

3733
workflow:
3834
_type: swe_bench
3935
predictor:
4036
_type: iterative
41-
llm_name: "claude_sonnet_llm"
42-
step_limit: 250
37+
llm_name: "claude_sonnet_llm" # "nim_llm" or "claude_sonnet_llm" or "openai_llm"
38+
step_limit: 100
4339
timeout: 60
4440

4541
functions:
@@ -51,7 +47,7 @@ functions:
5147
eval:
5248
general:
5349
output_dir: .tmp/nat/examples/evaluation_and_profiling/swe_bench/iterative/
54-
max_concurrency: 1
50+
max_concurrency: 5
5551
dataset:
5652
_type: parquet
5753
file_path: hf://datasets/princeton-nlp/SWE-bench_Lite/data/test-00000-of-00001.parquet
@@ -63,15 +59,15 @@ eval:
6359
field:
6460
instance_id:
6561
- sympy__sympy-20590
66-
# - sympy__sympy-21055
67-
# - sympy__sympy-11400
68-
# - astropy__astropy-12907
69-
# - astropy__astropy-6938
70-
# - django__django-15781
71-
# - django__django-11001
72-
# - mwaskom__seaborn-3010
73-
# - pallets__flask-4045
74-
# - psf__requests-1963
62+
- sympy__sympy-21055
63+
- sympy__sympy-11400
64+
- astropy__astropy-12907
65+
- django__django-15781
66+
- astropy__astropy-6938
67+
- django__django-11001
68+
- mwaskom__seaborn-3010
69+
- pallets__flask-4045
70+
- psf__requests-1963
7571

7672
evaluators:
7773
swe_bench:

examples/evaluation_and_profiling/swe_bench/src/nat_swe_bench/predictors/predict_iterative/predict_iterative.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,12 +450,13 @@ async def predict_fn(self, swebench_input: SWEBenchInput) -> str:
450450
org, repo = repo_name.split('__')
451451
repo_url = f"https://github.com/{org}/{repo}"
452452

453-
# Setup repo
453+
# Setup repo with instance_id for workspace isolation
454454
try:
455455
repo_path_str = await self.git_tool.arun(json.dumps({
456456
"operation": "setup",
457457
"repo_url": repo_url,
458-
"base_commit": swebench_input.base_commit
458+
"base_commit": swebench_input.base_commit,
459+
"instance_id": swebench_input.instance_id # Isolate workspace per instance
459460
}))
460461
repo_path = Path(repo_path_str)
461462
logger.info("Repository setup at %s", repo_path)

examples/evaluation_and_profiling/swe_bench/src/nat_swe_bench/predictors/predict_iterative/tools/git_tool.py

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,9 @@
2727
class RepoContext:
2828
"""Context manager for repository operations."""
2929
repo_url: str
30-
base_path: Path
30+
repo_path: Path # Actual path where the repo is cloned
3131
repo: Repo | None = None
3232

33-
def __post_init__(self):
34-
self.repo_name = self.repo_url.split('/')[-1].replace('.git', '')
35-
self.repo_path = self.base_path / self.repo_name
36-
3733

3834
class RepoManager:
3935

@@ -42,9 +38,18 @@ def __init__(self, workspace_dir: str):
4238
self.workspace.mkdir(parents=True, exist_ok=True)
4339
self.active_repos = {}
4440

45-
async def setup_repository(self, repo_url: str, base_commit: str) -> RepoContext:
46-
"""Setup a repository at a specific commit."""
47-
repo_path = get_repo_path(str(self.workspace), repo_url)
41+
async def setup_repository(
42+
self, repo_url: str, base_commit: str, instance_id: str | None = None
43+
) -> RepoContext:
44+
"""Setup a repository at a specific commit.
45+
46+
Args:
47+
repo_url: URL of the repository to clone
48+
base_commit: Commit hash to checkout
49+
instance_id: Optional instance ID for workspace isolation. When provided,
50+
each instance gets its own clean workspace directory.
51+
"""
52+
repo_path = get_repo_path(str(self.workspace), repo_url, instance_id)
4853

4954
if str(repo_path) in self.active_repos:
5055
context = self.active_repos[str(repo_path)]
@@ -54,7 +59,7 @@ async def setup_repository(self, repo_url: str, base_commit: str) -> RepoContext
5459
repo = await clone_repository(repo_url, repo_path)
5560
await checkout_commit(repo, base_commit)
5661

57-
context = RepoContext(repo_url=repo_url, base_path=self.workspace, repo=repo)
62+
context = RepoContext(repo_url=repo_url, repo_path=repo_path, repo=repo)
5863
self.active_repos[str(repo_path)] = context
5964
return context
6065

@@ -68,13 +73,28 @@ async def cleanup(self):
6873
self.active_repos.clear()
6974

7075

71-
def get_repo_path(workspace_dir: str, repo_url: str) -> Path:
72-
"""Generate a unique path for the repository."""
76+
def get_repo_path(workspace_dir: str, repo_url: str, instance_id: str | None = None) -> Path:
77+
"""Generate a unique path for the repository.
78+
79+
Args:
80+
workspace_dir: Base workspace directory
81+
repo_url: URL of the repository
82+
instance_id: Optional instance ID for unique workspace isolation
83+
84+
Returns:
85+
Path to the repository. If instance_id is provided, returns
86+
workspace_dir/instance_id/org/repo for complete isolation.
87+
Otherwise returns workspace_dir/org/repo.
88+
"""
7389
parts = repo_url.rstrip('/').split('/')
7490
repo_name = parts[-1].replace('.git', '')
7591
org_name = parts[-2] # Organization name
76-
77-
# Return: workspace_dir/org/repo
92+
93+
# If instance_id is provided, create isolated workspace per instance
94+
if instance_id:
95+
return Path(workspace_dir) / instance_id / org_name / repo_name
96+
97+
# Default: workspace_dir/org/repo
7898
return Path(workspace_dir) / org_name / repo_name
7999

80100

examples/evaluation_and_profiling/swe_bench/src/nat_swe_bench/predictors/predict_iterative/tools/register.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ async def git_operations(args_str: str) -> str:
4949
if operation == "setup":
5050
if 'repo_url' not in args or 'base_commit' not in args:
5151
raise ValueError("setup operation requires 'repo_url' and 'base_commit'")
52-
context = await repo_manager.setup_repository(args['repo_url'], args['base_commit'])
52+
# instance_id is optional - when provided, creates isolated workspace per instance
53+
instance_id = args.get('instance_id')
54+
context = await repo_manager.setup_repository(
55+
args['repo_url'], args['base_commit'], instance_id
56+
)
5357
return str(context.repo_path)
5458

5559
if operation == "cleanup":

0 commit comments

Comments
 (0)