Skip to content

Commit 1abbd6e

Browse files
committed
feat(swe-bench): isolate workspace per instance for parallel execution
Add instance_id to workspace path to prevent git conflicts between parallel instances. Each instance now uses .workspace/{instance_id}/org/repo instead of sharing .workspace/org/repo. Performance: 10 instances in ~8 min (vs ~30 min sequential) Results: 7/10 resolved (70%) with Claude Sonnet Signed-off-by: Jerry Guan <jerryguan777@gmail.com>
1 parent dbc2dd6 commit 1abbd6e

4 files changed

Lines changed: 76 additions & 49 deletions

File tree

examples/evaluation_and_profiling/swe_bench/src/nat_swe_bench/configs/config_iterative.yml

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,32 +14,28 @@
1414
# limitations under the License.
1515

1616
llms:
17-
nim_llm:
18-
_type: nim
19-
model_name: mistralai/mistral-nemotron
20-
temperature: 0.6
21-
max_tokens: 4096
22-
23-
llms:
17+
nim_llm:
18+
_type: nim
19+
model_name: mistralai/mistral-nemotron
20+
temperature: 0.0
21+
max_tokens: 4096
2422
claude_sonnet_llm:
2523
_type: litellm
2624
model_name: anthropic/claude-sonnet-4-5-20250929
2725
temperature: 0.0
28-
api_key: "${ANTHROPIC_API_KEY}" # Set this environment variable before running
29-
30-
# llms:
31-
# openai_llm:
32-
# _type: litellm
33-
# model_name: openai/gpt-5.2
34-
# temperature: 0.0
35-
# api_key: "${OPENAI_API_KEY}" # Set this environment variable before running
26+
api_key: "${ANTHROPIC_API_KEY}"
27+
openai_llm:
28+
_type: litellm
29+
model_name: openai/gpt-5.2
30+
temperature: 0.0
31+
api_key: "${OPENAI_API_KEY}"
3632

3733
workflow:
3834
_type: swe_bench
3935
predictor:
4036
_type: iterative
41-
llm_name: "claude_sonnet_llm"
42-
step_limit: 250
37+
llm_name: "claude_sonnet_llm" # "nim_llm" or "claude_sonnet_llm" or "openai_llm"
38+
step_limit: 100
4339
timeout: 60
4440

4541
functions:
@@ -51,7 +47,7 @@ functions:
5147
eval:
5248
general:
5349
output_dir: .tmp/nat/examples/evaluation_and_profiling/swe_bench/iterative/
54-
max_concurrency: 1
50+
max_concurrency: 5
5551
dataset:
5652
_type: parquet
5753
file_path: hf://datasets/princeton-nlp/SWE-bench_Lite/data/test-00000-of-00001.parquet
@@ -63,15 +59,15 @@ eval:
6359
field:
6460
instance_id:
6561
- sympy__sympy-20590
66-
# - sympy__sympy-21055
67-
# - sympy__sympy-11400
68-
# - astropy__astropy-12907
69-
# - astropy__astropy-6938
70-
# - django__django-15781
71-
# - django__django-11001
72-
# - mwaskom__seaborn-3010
73-
# - pallets__flask-4045
74-
# - psf__requests-1963
62+
- sympy__sympy-21055
63+
- sympy__sympy-11400
64+
- astropy__astropy-12907
65+
- django__django-15781
66+
- astropy__astropy-6938
67+
- django__django-11001
68+
- mwaskom__seaborn-3010
69+
- pallets__flask-4045
70+
- psf__requests-1963
7571

7672
evaluators:
7773
swe_bench:

examples/evaluation_and_profiling/swe_bench/src/nat_swe_bench/predictors/predict_iterative/predict_iterative.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -414,7 +414,7 @@ def run_cmd():
414414
except (TimeoutError, subprocess.TimeoutExpired) as e:
415415
# Extract output from exception if available (only subprocess.TimeoutExpired has output attribute)
416416
if isinstance(e, subprocess.TimeoutExpired) and hasattr(e, "output") and e.output:
417-
output = e.output.decode("utf-8", errors="replace")
417+
output = e.output if isinstance(e.output, str) else e.output.decode("utf-8", errors="replace")
418418
else:
419419
output = ""
420420
# Format timeout message using template
@@ -446,16 +446,17 @@ async def predict_fn(self, swebench_input: SWEBenchInput) -> str:
446446
wrapper_type=LLMFrameworkEnum.LANGCHAIN
447447
)
448448

449-
repo_name = swebench_input.instance_id.split('-')[0]
449+
repo_name = swebench_input.instance_id.rsplit('-', 1)[0] # eg. scikit-learn__scikit-learn-14520
450450
org, repo = repo_name.split('__')
451451
repo_url = f"https://github.com/{org}/{repo}"
452452

453-
# Setup repo
453+
# Setup repo with instance_id for workspace isolation
454454
try:
455455
repo_path_str = await self.git_tool.arun(json.dumps({
456456
"operation": "setup",
457457
"repo_url": repo_url,
458-
"base_commit": swebench_input.base_commit
458+
"base_commit": swebench_input.base_commit,
459+
"instance_id": swebench_input.instance_id # Isolate workspace per instance
459460
}))
460461
repo_path = Path(repo_path_str)
461462
logger.info("Repository setup at %s", repo_path)

examples/evaluation_and_profiling/swe_bench/src/nat_swe_bench/predictors/predict_iterative/tools/git_tool.py

Lines changed: 41 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -17,6 +17,7 @@
1717
import logging
1818
from dataclasses import dataclass
1919
from pathlib import Path
20+
from urllib.parse import urlparse
2021

2122
from git import Repo
2223

@@ -27,13 +28,9 @@
2728
class RepoContext:
2829
"""Context manager for repository operations."""
2930
repo_url: str
30-
base_path: Path
31+
repo_path: Path # Actual path where the repo is cloned
3132
repo: Repo | None = None
3233

33-
def __post_init__(self):
34-
self.repo_name = self.repo_url.split('/')[-1].replace('.git', '')
35-
self.repo_path = self.base_path / self.repo_name
36-
3734

3835
class RepoManager:
3936

@@ -42,9 +39,18 @@ def __init__(self, workspace_dir: str):
4239
self.workspace.mkdir(parents=True, exist_ok=True)
4340
self.active_repos = {}
4441

45-
async def setup_repository(self, repo_url: str, base_commit: str) -> RepoContext:
46-
"""Setup a repository at a specific commit."""
47-
repo_path = get_repo_path(str(self.workspace), repo_url)
42+
async def setup_repository(
43+
self, repo_url: str, base_commit: str, instance_id: str | None = None
44+
) -> RepoContext:
45+
"""Setup a repository at a specific commit.
46+
47+
Args:
48+
repo_url: URL of the repository to clone
49+
base_commit: Commit hash to checkout
50+
instance_id: Optional instance ID for workspace isolation. When provided,
51+
each instance gets its own clean workspace directory.
52+
"""
53+
repo_path = get_repo_path(str(self.workspace), repo_url, instance_id)
4854

4955
if str(repo_path) in self.active_repos:
5056
context = self.active_repos[str(repo_path)]
@@ -54,7 +60,7 @@ async def setup_repository(self, repo_url: str, base_commit: str) -> RepoContext
5460
repo = await clone_repository(repo_url, repo_path)
5561
await checkout_commit(repo, base_commit)
5662

57-
context = RepoContext(repo_url=repo_url, base_path=self.workspace, repo=repo)
63+
context = RepoContext(repo_url=repo_url, repo_path=repo_path, repo=repo)
5864
self.active_repos[str(repo_path)] = context
5965
return context
6066

@@ -68,13 +74,33 @@ async def cleanup(self):
6874
self.active_repos.clear()
6975

7076

71-
def get_repo_path(workspace_dir: str, repo_url: str) -> Path:
72-
"""Generate a unique path for the repository."""
73-
parts = repo_url.rstrip('/').split('/')
77+
def get_repo_path(workspace_dir: str, repo_url: str, instance_id: str | None = None) -> Path:
78+
"""Generate a unique path for the repository.
79+
80+
Args:
81+
workspace_dir: Base workspace directory
82+
repo_url: URL of the repository
83+
instance_id: Optional instance ID for unique workspace isolation
84+
85+
Returns:
86+
Path to the repository. If instance_id is provided, returns
87+
workspace_dir/instance_id/org/repo for complete isolation.
88+
Otherwise returns workspace_dir/org/repo.
89+
"""
90+
if "://" in repo_url:
91+
path = urlparse(repo_url).path
92+
else:
93+
# SSH form: git@host:org/repo.git
94+
path = repo_url.split(":", 1)[-1]
95+
parts = path.strip("/").split("/")
7496
repo_name = parts[-1].replace('.git', '')
7597
org_name = parts[-2] # Organization name
76-
77-
# Return: workspace_dir/org/repo
98+
99+
# If instance_id is provided, create isolated workspace per instance
100+
if instance_id:
101+
return Path(workspace_dir) / instance_id / org_name / repo_name
102+
103+
# Default: workspace_dir/org/repo
78104
return Path(workspace_dir) / org_name / repo_name
79105

80106

examples/evaluation_and_profiling/swe_bench/src/nat_swe_bench/predictors/predict_iterative/tools/register.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -49,7 +49,11 @@ async def git_operations(args_str: str) -> str:
4949
if operation == "setup":
5050
if 'repo_url' not in args or 'base_commit' not in args:
5151
raise ValueError("setup operation requires 'repo_url' and 'base_commit'")
52-
context = await repo_manager.setup_repository(args['repo_url'], args['base_commit'])
52+
# instance_id is optional - when provided, creates isolated workspace per instance
53+
instance_id = args.get('instance_id')
54+
context = await repo_manager.setup_repository(
55+
args['repo_url'], args['base_commit'], instance_id
56+
)
5357
return str(context.repo_path)
5458

5559
if operation == "cleanup":

0 commit comments

Comments
 (0)