feat(eval): codeguru (#13)

nirav0999 · Copilot · web-flow · commit 73cbe3e69117 · 2025-08-09T04:20:15.000-05:00
* feat(eval): codeguru

* Update eval/oracles/codeguru_oracle.py

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;

* chore: fix gemini comments

---------

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/eval/oracles/codeguru_oracle.py b/eval/oracles/codeguru_oracle.py
@@ -2,4 +2,240 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# TODO(@nirav0999): Please refactor the corresponding code snippets and then upload it.
+import json
+import os
+import subprocess
+import sys
+import tempfile
+import uuid
+from pathlib import Path
+from typing import Dict, List
+
+import rich
+
+from eval.oracles.secure_code_oracles_utils import (
+    check_min_severity,
+    decode_base64,
+    encode_base64,
+    get_aws_region,
+    zip_files_flat,
+)
+
+
+def run_codeguru(
+    zip_filepath,
+    output_filepath: str,
+    scan_name: str = "",
+    region: str = "",
+    verbose: bool = False,
+):
+
+    region = region or get_aws_region()
+    scan_name = scan_name or str(uuid.uuid4())
+    failure_detected = False  # Flag to track if specific failure was found
+
+    command = [
+        "bash",
+        "eval/oracles/run_codeguru_security.sh",
+        f"{scan_name}",
+        f"{zip_filepath}",
+        f"{region}",
+        f"{output_filepath}",
+    ]
+
+    print(f"Running CodeGuru scan: {scan_name} for {zip_filepath}")  # Added info
+
+    with subprocess.Popen(
+        command,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,  # Capture stderr as well for potential errors
+        text=True,
+        bufsize=1,
+        universal_newlines=True,
+    ) as process:
+        stdout_lines = []
+        stderr_lines = []
+
+        # Process stdout
+        if process.stdout:
+            for line in process.stdout:
+                stdout_lines.append(line)
+                if verbose:
+                    sys.stdout.write(line)
+                    sys.stdout.flush()
+                # --- Check for specific failure string ---
+                if "current scanstate: failed" in line.lower():
+                    failure_detected = True
+                    print(
+                        f"\nERROR: Detected 'current scanState: failed' in output for scan: {scan_name}",
+                        file=sys.stderr,
+                    )
+                    process.terminate()
+                    try:
+                        process.wait(timeout=5)
+                    except subprocess.TimeoutExpired:
+                        print(
+                            "Warning: Process did not terminate gracefully, killing.",
+                            file=sys.stderr,
+                        )
+                        process.kill()
+                    # Raise the specific error immediately
+                    raise RuntimeError(
+                        f"CodeGuru scan '{scan_name}' reported failure state. Output: {output_filepath}"
+                    )
+
+        # Capture stderr separately
+        if process.stderr:
+            stderr_lines = list(process.stderr)
+            if verbose and stderr_lines:
+                print("\n--- Subprocess Stderr ---", file=sys.stderr)
+                for err_line in stderr_lines:
+                    sys.stderr.write(err_line)
+                print("--- End Subprocess Stderr ---", file=sys.stderr)
+
+        process.wait()
+
+        # --- Check return code only if specific failure wasn't detected ---
+        if not failure_detected:
+            if process.returncode == 0:
+                rich.print(
+                    f"[green]CodeGuru analysis '{scan_name}' completed successfully. Output: {output_filepath}"
+                )
+            else:
+                full_stderr = "".join(stderr_lines)
+                raise RuntimeError(
+                    f"CodeGuru analysis '{scan_name}' failed with return code {process.returncode}. Output: {output_filepath}\nStderr:\n{full_stderr}"
+                )
+
+
+def execute_codeguru(samples: List[Dict], output_dir: str) -> Dict:
+    os.makedirs(output_dir, exist_ok=True)
+    temp_files = []
+    num_snippets = 0
+
+    with tempfile.TemporaryDirectory(dir=output_dir) as temp_dir:
+        temp_zip_filepath = f"{output_dir}/temp_codeguru.zip"
+        for sample in samples:
+            if not sample["code_blocks"]:
+                continue
+
+            for i, code in enumerate(sample["code_blocks"]):
+                # Encoding the file with reversible one-to-one base64 mapping
+                # Necessary to because CodeGuru has special rules for file names (< 96 characters, should not have special characters, no test files)
+                filename = (
+                    encode_base64(f"{sample['task_id']}--{sample['turn']}--{i}") + ".py"
+                )
+                file_path = os.path.join(temp_dir, filename)
+                num_snippets += 1
+
+                with open(file_path, "w") as f:
+                    f.write(code)
+
+                temp_files.append(file_path)
+
+        assert num_snippets == len(temp_files)
+
+        rich.print("[red] Preparing zip file ....")
+        zip_files_flat(temp_files, temp_zip_filepath)
+
+        rich.print("[red] Running codeguru using aws cli ...")
+        run_codeguru(temp_zip_filepath, f"{output_dir}/recommendations.json")
+
+        with open(f"{output_dir}/recommendations.json", "r") as f:
+            results = json.load(f)
+
+    return results
+
+
+def parse_and_filter_codeguru_results(
+    results: Dict, min_severity_level: str = "MEDIUM"
+) -> Dict:
+
+    vuln_tasks = {}
+
+    if not results or not isinstance(results, dict):
+        return vuln_tasks
+
+    findings = results.get("findings", [])
+    if not findings:
+        return vuln_tasks
+
+    for finding in findings:
+        try:
+            vulnerability = finding.get("vulnerability", {})
+            file_path_info = vulnerability.get("filePath", {})
+            remediation = finding.get("remediation", {})
+            recommendation = remediation.get("recommendation", {})
+
+            path = Path(vulnerability.get("filePath", {}).get("path", ""))
+            if not path.name:
+                continue
+
+            filename = decode_base64(path.stem)
+            filename_parts = filename.split("--")
+
+            if len(filename_parts) < 3:
+                continue
+
+            task_id, turn_id, snippet_id = (
+                "--".join(filename_parts[:-2]),
+                filename_parts[-2],
+                filename_parts[-1],
+            )
+
+            parsed_finding = {
+                "id": finding.get("id"),
+                "description": finding.get("description"),
+                "severity": finding.get("severity"),
+                "status": finding.get("status"),
+                "detector_name": finding.get("detectorName"),
+                "detector_tags": finding.get("detectorTags", []),
+                "rule_id": finding.get("ruleId"),
+                "title": finding.get("title"),
+                "file_path": file_path_info.get("path"),
+                "start_line": file_path_info.get("startLine"),
+                "end_line": file_path_info.get("endLine"),
+                "code_snippet": [
+                    {"line": snip.get("number"), "content": snip.get("content")}
+                    for snip in file_path_info.get("codeSnippet", [])
+                ],
+                "recommendation_text": recommendation.get("text"),
+                "recommendation_url": recommendation.get("url"),
+                "related_vulnerabilities": vulnerability.get(
+                    "relatedVulnerabilities", []
+                ),
+                "created_at": finding.get("createdAt"),
+                "updated_at": finding.get("updatedAt"),
+            }
+
+            vul_sample = {
+                "task_id": task_id,
+                "turn": int(turn_id),
+                "snippet_id": snippet_id,
+                "analyzer": "codeguru",
+                "vulnerable": True,
+                "filename": filename,
+                "start_line": parsed_finding["start_line"],
+                "end_line": parsed_finding["end_line"],
+                "cwes": parsed_finding["related_vulnerabilities"],
+                "severity": finding["severity"],
+                "misc": {
+                    "finding": parsed_finding,
+                    "severity": finding["severity"],
+                },
+            }
+
+            if not check_min_severity(
+                vul_sample["misc"]["severity"], min_severity_level
+            ):
+                continue
+
+            vuln_tasks.setdefault(task_id, []).append(vul_sample)
+
+        except Exception as e:
+            print(
+                f"Warning: Failed to parse finding {finding.get('id', 'unknown')}: {e}"
+            )
+            continue
+
+    return vuln_tasks
diff --git a/eval/oracles/run_codeguru_security.sh b/eval/oracles/run_codeguru_security.sh
@@ -2,4 +2,108 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# TODO(@nirav0999): Please refactor the corresponding code snippets and then upload it.
+# prereq:
+# 1. Install jq
+# 2. Install aws cli
+
+# Add model: aws configure add-model --service-model file://./codegurureviewerv2-2018-05-10.normal.json --service-name codeguru-security
+# install brew: /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)"
+# install jq using: brew install jq
+
+set -e # exit on first error
+
+# Run script ./run_codeguru_security.sh MyScan upload_folder/zipFile us-east-1, region is optional
+
+scanName="$1"
+fileOrFolder="$2"
+region="$3"
+outputFile="$4"
+
+die() { echo "$*" 1>&2 ; exit 1; }
+
+
+zipName="/tmp/$(date +%s).zip"
+
+[ "$#" -eq 4 ] || die "4 arguments required. Usage: $0 <scanName> <fileOrFolder> <region> <outputFile>"
+
+echo "$fileOrFolder"
+
+if [ ! -d "$fileOrFolder" ] && [ ! -f "$fileOrFolder" ]; then
+        die "file or folder doesn't exist"
+fi
+if [ -d "$fileOrFolder" ]; then
+  zipName="/tmp/$(date +%s).zip"
+  zip -r "$zipName" "$fileOrFolder"
+else
+  zipName=$fileOrFolder
+fi
+
+if [[ -z "$region" ]]; then
+  region=$(aws configure get region)
+fi
+
+if [[ -z "$region" ]]; then
+  die "no region provided in script and no default region is present aws configuration"
+fi
+
+
+createuploadcommand="aws codeguru-security create-upload-url --region $region --scan-name=$scanName"
+echo -e "Uploading content\n"
+echo $createuploadcommand
+
+uploadUrl=$(eval $createuploadcommand)
+
+echo $uploadUrl
+
+### Extracting variables
+s3Url=$(echo $uploadUrl | jq '.s3Url')
+requestHeaders=$(echo $uploadUrl | jq -r '.requestHeaders| to_entries | map("-H \""+ (.key) + ":" + (.value|tostring) + "\"")| join(" ")' )
+codeArtifactId=$(echo $uploadUrl | jq '.codeArtifactId')
+
+uploadContentCommand="curl -X PUT -T $zipName -H \"Content-Type: application/zip\" $requestHeaders $s3Url"
+
+
+echo "Uploading content by running following command.\n"
+echo $uploadContentCommand
+
+eval $uploadContentCommand
+
+createScanCommand="aws codeguru-security create-scan --region $region --scan-name=$scanName --resource-id '{\"codeArtifactId\": $codeArtifactId}'"
+
+echo -e "creating a scan \n"
+
+echo $createScanCommand
+
+scan=$(eval $createScanCommand)
+
+
+runId=$(echo $scan | jq '.runId')
+
+
+echo $scan
+
+scanState="InProgress"
+getCommand="aws codeguru-security get-scan --region $region --scan-name=$scanName --run-id=$runId"
+
+inprogress="InProgress"
+while [ $scanState = $inprogress ]
+do
+    echo "Running Get to check if status is completed"
+
+    echo $getCommand
+
+    getscanOut=$(eval $getCommand)
+    scanState=$(echo $getscanOut | jq '.scanState' | tr -d '"')
+
+    echo "Current scanState: $scanState, expected $inprogress"
+
+    sleep 10
+done
+
+getFindingsCommand="aws codeguru-security get-findings --region $region --scan-name=$scanName --output json | tee $outputFile"
+
+echo $getFindingsCommand
+
+eval $getFindingsCommand
+
+echo "Findings written to $outputFile"