Parallax/PRM/plot_distributions.py at main · Scientific-Computing-Lab/Parallax · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import json
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

scores = []
prompt_lengths = []

# Resolve paths relative to the repository so the project is self-contained.
_REPO_ROOT = Path(__file__).resolve().parent.parent
_DATA_PATH = _REPO_ROOT / "dataset_build_n_eval" / "data" / "Datasets" / "HeCBench" / "redo_output_a3_full.jsonl"

with open(_DATA_PATH, "r") as f:
    for line in f:
        line = line.strip()
        if not line:
            continue
        record = json.loads(line)
        for vs in record["vector_scores"]:
            scores.append(vs["score"])
        prompt_lengths.append(len(record["prompt"])-len("You are an HPC expert specializing in translating between parallel programming APIs.\nFor each kernel code provided, translate it from serial to cuda. Provide the complete code in cuda. Do not truncate or use ellipses. Do not change the main function. Ensure correctness. All function names must match. The code to translate: // File:"))

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

if len(scores) > 0:
    # Score distribution
    axes[0].hist(scores, bins=50, edgecolor="black", color="steelblue")
    axes[0].set_title("Distribution of vector_scores score", fontsize=14)
    axes[0].set_xlabel("score")
    axes[0].set_ylabel("Count")
    axes[0].axvline(np.mean(scores), color="red", linestyle="--", label=f"Mean: {np.mean(scores):.4f}")
    axes[0].axvline(np.median(scores), color="orange", linestyle="--", label=f"Median: {np.median(scores):.4f}")
    axes[0].set_xticks(np.arange(0, 1.05, 0.05))
    axes[0].tick_params(axis="x", rotation=45)
    axes[0].legend()

# Prompt length distribution
axes[1].hist(prompt_lengths, bins=50, edgecolor="black", color="seagreen")
axes[1].set_title("Distribution of Prompt Lengths (chars)", fontsize=14)
axes[1].set_xlabel("Prompt length (characters)")
axes[1].set_ylabel("Count")
axes[1].axvline(np.mean(prompt_lengths), color="red", linestyle="--", label=f"Mean: {np.mean(prompt_lengths):.0f}")
axes[1].axvline(np.median(prompt_lengths), color="orange", linestyle="--", label=f"Median: {np.median(prompt_lengths):.0f}")
axes[1].legend()

plt.tight_layout()
plt.savefig("figures/train_code_a3_dstribution_no_llmjudge.png", dpi=150)
plt.show()

print(f"Total records: {len(scores)}")
if len(scores) > 0:
    print(f"Score  — min: {min(scores):.4f}, max: {max(scores):.4f}, mean: {np.mean(scores):.4f}, std: {np.std(scores):.4f}")
print(f"Prompt — min: {min(prompt_lengths)}, max: {max(prompt_lengths)}, mean: {np.mean(prompt_lengths):.0f}, std: {np.std(prompt_lengths):.0f}")