From eb6da3ccfcbb7c7e42a4a3cbe953db25c509af08 Mon Sep 17 00:00:00 2001 From: Robert Jackson Date: Thu, 23 Apr 2026 12:28:33 -0500 Subject: [PATCH] ADD: calculate_cohen_kappa utility and unit test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds calculate_cohen_kappa(df, label_col, pred_col) to lars/util/confusion_matrix.py and exports it from lars/util/__init__.py. Unit test asserts the exact value κ=5/11 for the mock six-row CSV (4 agreements, p_e=7/18 from equal marginals across three classes). Co-Authored-By: Claude Sonnet 4.6 --- lars/util/__init__.py | 2 +- lars/util/confusion_matrix.py | 22 +++++++++++++++++++++- tests/test_confusion_matrix.py | 12 ++++++++++++ 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lars/util/__init__.py b/lars/util/__init__.py index d007ce0..a4898db 100644 --- a/lars/util/__init__.py +++ b/lars/util/__init__.py @@ -1 +1 @@ -from .confusion_matrix import plot_confusion_matrix # noqa: F401 +from .confusion_matrix import plot_confusion_matrix, calculate_cohen_kappa # noqa: F401 diff --git a/lars/util/confusion_matrix.py b/lars/util/confusion_matrix.py index c1fd867..c5ef756 100644 --- a/lars/util/confusion_matrix.py +++ b/lars/util/confusion_matrix.py @@ -1,5 +1,5 @@ import matplotlib.pyplot as plt -from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, cohen_kappa_score from sklearn.preprocessing import LabelEncoder def plot_confusion_matrix(df, label_col='label', pred_col='llm_label', normalize=None, ax=None): @@ -30,3 +30,23 @@ def plot_confusion_matrix(df, label_col='label', pred_col='llm_label', normalize disp.plot(ax=ax, cmap=plt.cm.Blues) ax.set_title('Confusion Matrix') + + +def calculate_cohen_kappa(df, label_col='label', pred_col='llm_label'): + """ + Calculate Cohen's kappa from true and predicted labels in a DataFrame. + + Parameters + ---------- + df (pd.DataFrame): DataFrame containing true and predicted labels. + label_col (str): Column name for true labels. + pred_col (str): Column name for predicted labels. + + Returns + ------- + float: Cohen's kappa coefficient. + """ + return cohen_kappa_score( + df[label_col].str.lower(), + df[pred_col].str.lower(), + ) diff --git a/tests/test_confusion_matrix.py b/tests/test_confusion_matrix.py index 259ab9d..7e1a9e8 100644 --- a/tests/test_confusion_matrix.py +++ b/tests/test_confusion_matrix.py @@ -88,6 +88,18 @@ def test_confusion_matrix_counts(sample_df): np.testing.assert_array_equal(actual, expected) +def test_cohen_kappa(sample_df): + """κ = (p_o - p_e) / (1 - p_e) + p_o = 4/6 (4 agreements out of 6 rows) + p_e = (3/6)^2 + (2/6)^2 + (1/6)^2 = 14/36 = 7/18 + κ = (12/18 - 7/18) / (11/18) = 5/11 + """ + from lars.util.confusion_matrix import calculate_cohen_kappa + + kappa = calculate_cohen_kappa(sample_df) + assert kappa == pytest.approx(5 / 11) + + def test_normalized_confusion_matrix_values(sample_df): """Row-normalized values for each true class.""" from lars.util.confusion_matrix import plot_confusion_matrix