-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_distributions.py
More file actions
53 lines (45 loc) · 2.53 KB
/
Copy pathplot_distributions.py
File metadata and controls
53 lines (45 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import json
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
scores = []
prompt_lengths = []
# Resolve paths relative to the repository so the project is self-contained.
_REPO_ROOT = Path(__file__).resolve().parent.parent
_DATA_PATH = _REPO_ROOT / "dataset_build_n_eval" / "data" / "Datasets" / "HeCBench" / "redo_output_a3_full.jsonl"
with open(_DATA_PATH, "r") as f:
for line in f:
line = line.strip()
if not line:
continue
record = json.loads(line)
for vs in record["vector_scores"]:
scores.append(vs["score"])
prompt_lengths.append(len(record["prompt"])-len("You are an HPC expert specializing in translating between parallel programming APIs.\nFor each kernel code provided, translate it from serial to cuda. Provide the complete code in cuda. Do not truncate or use ellipses. Do not change the main function. Ensure correctness. All function names must match. The code to translate: // File:"))
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
if len(scores) > 0:
# Score distribution
axes[0].hist(scores, bins=50, edgecolor="black", color="steelblue")
axes[0].set_title("Distribution of vector_scores score", fontsize=14)
axes[0].set_xlabel("score")
axes[0].set_ylabel("Count")
axes[0].axvline(np.mean(scores), color="red", linestyle="--", label=f"Mean: {np.mean(scores):.4f}")
axes[0].axvline(np.median(scores), color="orange", linestyle="--", label=f"Median: {np.median(scores):.4f}")
axes[0].set_xticks(np.arange(0, 1.05, 0.05))
axes[0].tick_params(axis="x", rotation=45)
axes[0].legend()
# Prompt length distribution
axes[1].hist(prompt_lengths, bins=50, edgecolor="black", color="seagreen")
axes[1].set_title("Distribution of Prompt Lengths (chars)", fontsize=14)
axes[1].set_xlabel("Prompt length (characters)")
axes[1].set_ylabel("Count")
axes[1].axvline(np.mean(prompt_lengths), color="red", linestyle="--", label=f"Mean: {np.mean(prompt_lengths):.0f}")
axes[1].axvline(np.median(prompt_lengths), color="orange", linestyle="--", label=f"Median: {np.median(prompt_lengths):.0f}")
axes[1].legend()
plt.tight_layout()
plt.savefig("figures/train_code_a3_dstribution_no_llmjudge.png", dpi=150)
plt.show()
print(f"Total records: {len(scores)}")
if len(scores) > 0:
print(f"Score — min: {min(scores):.4f}, max: {max(scores):.4f}, mean: {np.mean(scores):.4f}, std: {np.std(scores):.4f}")
print(f"Prompt — min: {min(prompt_lengths)}, max: {max(prompt_lengths)}, mean: {np.mean(prompt_lengths):.0f}, std: {np.std(prompt_lengths):.0f}")