Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# 🔮 PurpCode: Reasoning for Safer Code Generation

This repository includes the training and evaluation infrastructure for PurpCode. For other resources, please check out:

* [📝 Paper](https://arxiv.org/abs/2507.19060) with technical and evaluation details
* [🤗 HuggingFace](https://github.com/purpcode-uiuc/purpcode) including model checkpoints and training/evaluation datasets
* [🥇 1st Place at Amazon Nova AI Challenge 2025](https://www.amazon.science/nova-ai-challenge/pushing-the-boundaries-of-secure-ai-winners-of-the-amazon-nova-ai-challenge)

## Overview

PurpCode is an alignment method and a fully open-source recipe (data, model, and code) for eliciting **cybersafe reasoning** capabilities of coding models, including secure code generation and defending against malicious cyber events.
PurpCode includes two alignment stages:

1. **[Rule Learning](#rule-learning):** teaching LLMs secure coding rules and general safety practices
2. **[Reinforcement Learning](#reinforcement-learning):** letting LLMs co-exercise their safety and utility via verifiable tasks

We also curate comprehensive safety data via internal red teaming and use various evaluators covering cybersafety, utility, and overrefusal.

## Rule Learning

TBD

## Reinforcement Learning

TBD

## Evaluation

```bash
export PYTHONPATH=$PYTHONPATH:$(pwd)

python eval/main.py --task "purpcode/CyberSecEval-SCG" --model purpcode/purpcode-14b-rl
python eval/main.py --task "purpcode/CodeLMSec" --model purpcode/purpcode-14b-rl
python eval/main.py --task "purpcode/CWEval" --model purpcode/purpcode-14b-rl
python eval/main.py --task "purpcode/CyberSecEval-MITRE" --model purpcode/purpcode-14b-rl
python eval/main.py --task "purpcode/CyberSecEval-FRR" --model purpcode/purpcode-14b-rl
python eval/main.py --task "purpcode/XSCode" --model purpcode/purpcode-14b-rl
python eval/main.py --task "purpcode/XSTest" --model purpcode/purpcode-14b-rl
python eval/main.py --task "purpcode/PHTest" --model purpcode/purpcode-14b-rl
```

Notes:
* `--oracle` for evaluating customized generation (default guessing from dataset).

## References

```bibtex
@article{purpcode,
title = {PurpCode: Reasoning for Safer Code Generation},
author = {Liu, Jiawei and Diwan, Nirav and Wang, Zhe and Zhai, Haoyu and Zhou, Xiaona and Nguyen, Kiet A. and Yu, Tianjiao and Wahed, Muntasir and Deng, Yinlin and Benkraouda, Hadjer and Wei, Yuxiang and Zhang, Lingming and Lourentzou, Ismini and Wang, Gang},
journal = {arXiv preprint arXiv:2507.19060},
year = {2025},
}
```
9 changes: 9 additions & 0 deletions eval/cweval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# SPDX-FileCopyrightText: (c) UIUC PurpCode Team
#
# SPDX-License-Identifier: Apache-2.0

# TODO(@zhewang2001): Please refactor the corresponding code snippets and then upload it.


def evaluate_cweval(*args):
pass # placeholder
9 changes: 9 additions & 0 deletions eval/cyberseceval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# SPDX-FileCopyrightText: (c) UIUC PurpCode Team
#
# SPDX-License-Identifier: Apache-2.0

# TODO(@zhewang2001): Please refactor the corresponding code snippets and then upload it.


def evaluate_cyberseceval(*args):
pass # placeholder
27 changes: 27 additions & 0 deletions eval/eval_only.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# SPDX-FileCopyrightText: (c) UIUC PurpCode Team
#
# SPDX-License-Identifier: Apache-2.0

from eval.evaluate import DEFAULT_LLM_JUDGE, evaluate_main


def main(
task: str,
generation_path: str,
oracle: str = None,
llm_judge: str = DEFAULT_LLM_JUDGE,
reference_results_path: str = None,
):
evaluate_main(
task,
generation_path,
oracle=oracle,
llm_judge=llm_judge,
reference_results_path=reference_results_path,
)


if __name__ == "__main__":
from fire import Fire

Fire(main)
149 changes: 149 additions & 0 deletions eval/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# SPDX-FileCopyrightText: (c) UIUC PurpCode Team
#
# SPDX-License-Identifier: Apache-2.0

import json
import subprocess

from evalplus.sanitize import sanitize

from eval.generate import preprocess_generation

# TODO(@zhewang2001): allow users to play LLM judge based on vLLM, instead of relying on bedrock
DEFAULT_LLM_JUDGE = "bedrock/us.meta.llama3-3-70b-instruct-v1:0"


def to_evalplus_format(generation_path: str) -> str:
with open(generation_path, "r") as f:
data = [json.loads(line) for line in f]

evalplus_path = generation_path.replace(".jsonl", ".evalplus.jsonl")
with open(evalplus_path, "w") as f:
for item in data:
f.write(
json.dumps(
{
"task_id": item["task_id"],
"solution": sanitize(item["messages"][-1]["content"]),
}
)
+ "\n"
)
return evalplus_path


def evaluate_main(
task: str,
generation_path: str,
oracle: str = None,
llm_judge: str = None,
reference_results_path: str = None,
):
if oracle is None: # Guessing oracle
print(f"Guessing oracle for task {task}...")
if task in ["purpcode/mbppplus", "purpcode/humanevalplus"]:
oracle = "evalplus"
elif task.startswith("purpcode/CyberSecEval"):
field = task.split("/")[-1].split("-")[-1]
if field == "SCG":
oracle = "cyberseceval"
elif field == "MITRE":
oracle = "malevent"
elif field == "FRR":
oracle = "overrefusal"
else:
raise ValueError(f"Unknown dataset: {task}")
elif task.startswith("purpcode/CodeLMSec"):
oracle = "codeql"
elif task.startswith("purpcode/XSCode"):
oracle = "xscode"
elif task.startswith("purpcode/XSTest"):
oracle = "overrefusal"
elif task.startswith("purpcode/PHTest"):
oracle = "phtest"
elif task.startswith("purpcode/CWEval"):
oracle = "cweval"
else:
raise ValueError(f"Unknown oracle for {task = }. Please specify.")
print(f"Guessed oracle: {oracle}")

generation_path = preprocess_generation(generation_path)
if oracle == "evalplus":
generation_path = to_evalplus_format(generation_path)
dataset = task.split("/")[-1].removesuffix("plus")
assert dataset in ["mbpp", "humaneval"]
subprocess.run(
["evalplus.evaluate", "--dataset", dataset, "--samples", generation_path]
)
elif oracle == "safety":
from eval.oracles.compound_safety_check import evaluate_compound_safety

evaluate_compound_safety(
task=task,
generation_path=generation_path,
model=llm_judge or DEFAULT_LLM_JUDGE,
min_severity_level="MEDIUM",
reference_results_path=reference_results_path,
show_separate_stats=True,
)
elif oracle == "cyberseceval":
from eval.cyberseceval import evaluate_cyberseceval

evaluate_cyberseceval(generation_path=generation_path, task=task)
elif oracle == "codeguru":
from eval.oracles.secure_code_oracles import evaluate_secure_code_gen

evaluate_secure_code_gen(
generation_path=generation_path,
min_severity_level="MEDIUM",
analyzers=["codeguru"],
)
elif oracle == "codeql":
from eval.oracles.secure_code_oracles import evaluate_secure_code_gen

evaluate_secure_code_gen(
generation_path=generation_path,
min_severity_level="MEDIUM",
analyzers=["codeql"],
)
elif oracle == "malevent":
from eval.oracles.compound_safety_check import (
evaluate_malicious_event_assistance,
)

evaluate_malicious_event_assistance(
generation_path=generation_path,
model=llm_judge or DEFAULT_LLM_JUDGE,
task=task,
)
elif oracle == "overrefusal":
from eval.oracles.check_secqa import evaluate_secqa_answers

evaluate_secqa_answers(
generation_path=generation_path, model=llm_judge or DEFAULT_LLM_JUDGE
)
elif oracle == "xscode":
from eval.oracles.xscode_overrefuse import evaluate_xscode_answers

evaluate_xscode_answers(
generation_path=generation_path, model=DEFAULT_LLM_JUDGE
)

elif oracle == "phtest":
from eval.phtest import evaluate_phtest

evaluate_phtest(
generation_path=generation_path, model=llm_judge or DEFAULT_LLM_JUDGE
)
elif oracle == "cweval":
from eval.cweval import evaluate_cweval

evaluate_cweval(generation_path=generation_path, task=task)
else:
raise ValueError(f"Unknown oracle: {oracle}")


if __name__ == "__main__":
from fire import Fire

Fire(evaluate_main)
Loading