Skip to content

Commit 4eb4e3f

Browse files
authored
fix(datagen): remove remote_api from rule2code scripts (#20)
* fix: remove remote_api * fix: remove remote_api
1 parent d7b033a commit 4eb4e3f

File tree

2 files changed

+13
-27
lines changed

2 files changed

+13
-27
lines changed

datagen/rule2code/cwe2code.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -219,17 +219,17 @@ def generate_followup_prompt():
219219
"""
220220

221221

222-
def _create_client(remote_api=False):
223-
if remote_api:
224-
return OpenAI(base_url="https://api.deepseek.com"), "deepseek-reasoner"
225-
# Otherwise sglang
226-
return OpenAI(api_key="none", base_url="http://0.0.0.0:30000/v1"), "default"
222+
def _create_client():
223+
return (
224+
OpenAI(api_key="none", base_url="http://localhost:30000/v1"),
225+
"default",
226+
)
227227

228228

229-
def datagen_for_one_cwe(cwe_id, markdown, depth, remote_api=False):
229+
def datagen_for_one_cwe(cwe_id, markdown, depth):
230230
assert depth > 0
231231

232-
client, model = _create_client(remote_api=remote_api)
232+
client, model = _create_client()
233233
common_args = {"model": model, "temperature": 0.6}
234234

235235
rprint(f"[bold yellow]Processing: CWE ID: {cwe_id}[/bold yellow]")
@@ -277,10 +277,10 @@ def main(
277277
parallel=256,
278278
output_path="outputs/rule2code/cwe2code.jsonl",
279279
depth=1,
280-
remote_api=False,
281280
):
281+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
282+
282283
collection = create_cwe_information()
283-
# each line: cwe_id, conversation
284284

285285
finished = set()
286286
if os.path.exists(output_path):
@@ -294,9 +294,7 @@ def main(
294294
if cwe_id in finished:
295295
continue
296296
futures.append(
297-
executor.submit(
298-
datagen_for_one_cwe, cwe_id, markdown, depth, remote_api
299-
)
297+
executor.submit(datagen_for_one_cwe, cwe_id, markdown, depth)
300298
)
301299

302300
for future in tqdm(as_completed(futures), total=len(futures)):

datagen/rule2code/guru2code.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,7 @@ def generate_followup_prompt(seed_data):
137137
--- END OF EXAMPLE ---"""
138138

139139

140-
def _create_client(remote_api=False):
141-
if remote_api:
142-
load_dotenv()
143-
return None, "bedrock/converse/us.deepseek.r1-v1:0"
140+
def _create_client():
144141
return (
145142
OpenAI(api_key="none", base_url="http://localhost:30000/v1"),
146143
"default",
@@ -152,9 +149,8 @@ def datagen_for_one_seed(
152149
output_file,
153150
finished_pairs,
154151
depth=1,
155-
remote_api=False,
156152
):
157-
client, model = _create_client(remote_api=remote_api)
153+
client, model = _create_client()
158154
common_args = {
159155
"model": model,
160156
"temperature": 0.8,
@@ -173,13 +169,7 @@ def datagen_for_one_seed(
173169
]
174170

175171
for i in range(depth):
176-
if remote_api:
177-
response = batch_completion(
178-
model=model,
179-
messages=[messages],
180-
)[0]
181-
else:
182-
response = client.chat.completions.create(messages=messages, **common_args)
172+
response = client.chat.completions.create(messages=messages, **common_args)
183173

184174
if response.choices[0].finish_reason == "length":
185175
break
@@ -213,7 +203,6 @@ def main(
213203
parallel=256,
214204
output_path="outputs/rule2code/guru2code.jsonl",
215205
depth=1,
216-
remote_api=False,
217206
):
218207
os.makedirs(os.path.dirname(output_path), exist_ok=True)
219208

@@ -239,7 +228,6 @@ def main(
239228
output_path,
240229
finished_pairs,
241230
depth,
242-
remote_api,
243231
)
244232
)
245233

0 commit comments

Comments
 (0)