-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpaper_stats.py
More file actions
126 lines (100 loc) · 3.96 KB
/
paper_stats.py
File metadata and controls
126 lines (100 loc) · 3.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""Recompute the paper-level RigidBench v3.1 statistics.
Run from an extracted release directory:
python paper_stats.py
The script also works from the FINAL paper directory if
``rigidbench_code_and_data.zip`` is present.
"""
from __future__ import annotations
import json
import math
import zipfile
from collections import Counter
from pathlib import Path
MODEL_ORDER = [
"gpt_55",
"kimi_k2p6",
"gemini_25_pro",
"gemini_25_flash",
"deepseek_v4",
"claude_sonnet_46",
"llama4_scout",
"gpt_oss_120b",
"grok_43",
]
def load_rows() -> list[dict]:
root = Path(".")
result_files = [root / "results" / name / "rigidbench_v3_results.jsonl" for name in MODEL_ORDER]
if all(path.exists() for path in result_files):
rows: list[dict] = []
for path in result_files:
rows.extend(json.loads(line) for line in path.read_text(encoding="utf-8").splitlines() if line)
return rows
zip_path = root / "rigidbench_code_and_data.zip"
if not zip_path.exists():
raise SystemExit("Could not find results/ or rigidbench_code_and_data.zip")
rows = []
with zipfile.ZipFile(zip_path) as zf:
for name in MODEL_ORDER:
member = f"results\\{name}\\rigidbench_v3_results.jsonl"
with zf.open(member) as fh:
rows.extend(json.loads(line.decode("utf-8")) for line in fh if line.strip())
return rows
def chi_square_pressure(rows: list[dict]) -> tuple[float, float, float, list[list[int]]]:
table: list[list[int]] = []
for level in ["low", "mid", "high"]:
items = [r for r in rows if r.get("pressure_level") == level]
sem = sum(r.get("error_type") == "SEM_SUB" for r in items)
table.append([len(items) - sem, sem])
row_totals = [sum(row) for row in table]
col_totals = [sum(table[i][j] for i in range(len(table))) for j in range(2)]
n = sum(row_totals)
chi2 = 0.0
for i, row in enumerate(table):
for j, observed in enumerate(row):
expected = row_totals[i] * col_totals[j] / n
chi2 += (observed - expected) ** 2 / expected
# Survival function for chi-square with df=2.
p_value = math.exp(-chi2 / 2.0)
cramers_v = math.sqrt(chi2 / n)
return chi2, p_value, cramers_v, table
def logistic_odds(rows: list[dict]) -> list[float]:
try:
import numpy as np
from sklearn.linear_model import LogisticRegression
except ImportError as exc:
raise SystemExit("Install scikit-learn to recompute logistic odds ratios") from exc
pressure = {"low": 0, "mid": 1, "high": 2}
x_rows = []
y = []
for row in rows:
x_rows.append(
[
pressure.get(row.get("pressure_level"), 1),
row.get("semantic_sim_name_to_lure", 0.0) or 0.0,
row.get("phon_distance_name_to_neighbor", 3) or 3,
]
)
y.append(1 if row.get("error_type") == "SEM_SUB" else 0)
model = LogisticRegression(max_iter=1000)
model.fit(np.array(x_rows), np.array(y))
return [math.exp(coef) for coef in model.coef_[0]]
def main() -> None:
rows = load_rows()
counts = Counter(row.get("error_type") for row in rows)
print(f"Total rows: {len(rows)}")
print(f"Outcome counts: {dict(counts)}")
for level in ["low", "mid", "high"]:
items = [row for row in rows if row.get("pressure_level") == level]
sem = sum(row.get("error_type") == "SEM_SUB" for row in items)
print(f"{level}: {sem}/{len(items)} = {sem / len(items):.4f}")
chi2, p_value, cramers_v, table = chi_square_pressure(rows)
print(f"Pressure table [[no_sub, sem_sub], ...]: {table}")
print(f"chi2={chi2:.2f}, df=2, p={p_value:.2e}, Cramer's V={cramers_v:.2f}")
odds = logistic_odds(rows)
print(
"Regularized logistic odds ratios: "
f"pressure={odds[0]:.2f}, semantic_similarity={odds[1]:.2f}, "
f"phonological_distance={odds[2]:.2f}"
)
if __name__ == "__main__":
main()