From eb6da3ccfcbb7c7e42a4a3cbe953db25c509af08 Mon Sep 17 00:00:00 2001
From: Robert Jackson <rjackson@anlvpn205.evs.anl.gov>
Date: Thu, 23 Apr 2026 12:28:33 -0500
Subject: [PATCH] ADD: calculate_cohen_kappa utility and unit test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds calculate_cohen_kappa(df, label_col, pred_col) to lars/util/confusion_matrix.py
and exports it from lars/util/__init__.py.

Unit test asserts the exact value κ=5/11 for the mock six-row CSV
(4 agreements, p_e=7/18 from equal marginals across three classes).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 lars/util/__init__.py          |  2 +-
 lars/util/confusion_matrix.py  | 22 +++++++++++++++++++++-
 tests/test_confusion_matrix.py | 12 ++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/lars/util/__init__.py b/lars/util/__init__.py
index d007ce0..a4898db 100644
--- a/lars/util/__init__.py
+++ b/lars/util/__init__.py
@@ -1 +1 @@
-from .confusion_matrix import plot_confusion_matrix # noqa: F401
+from .confusion_matrix import plot_confusion_matrix, calculate_cohen_kappa # noqa: F401
diff --git a/lars/util/confusion_matrix.py b/lars/util/confusion_matrix.py
index c1fd867..c5ef756 100644
--- a/lars/util/confusion_matrix.py
+++ b/lars/util/confusion_matrix.py
@@ -1,5 +1,5 @@
 import matplotlib.pyplot as plt
-from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
+from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, cohen_kappa_score
 from sklearn.preprocessing import LabelEncoder
 
 def plot_confusion_matrix(df, label_col='label', pred_col='llm_label', normalize=None, ax=None):
@@ -30,3 +30,23 @@ def plot_confusion_matrix(df, label_col='label', pred_col='llm_label', normalize
     
     disp.plot(ax=ax, cmap=plt.cm.Blues)
     ax.set_title('Confusion Matrix')
+
+
+def calculate_cohen_kappa(df, label_col='label', pred_col='llm_label'):
+    """
+    Calculate Cohen's kappa from true and predicted labels in a DataFrame.
+
+    Parameters
+    ----------
+    df (pd.DataFrame): DataFrame containing true and predicted labels.
+    label_col (str): Column name for true labels.
+    pred_col (str): Column name for predicted labels.
+
+    Returns
+    -------
+    float: Cohen's kappa coefficient.
+    """
+    return cohen_kappa_score(
+        df[label_col].str.lower(),
+        df[pred_col].str.lower(),
+    )
diff --git a/tests/test_confusion_matrix.py b/tests/test_confusion_matrix.py
index 259ab9d..7e1a9e8 100644
--- a/tests/test_confusion_matrix.py
+++ b/tests/test_confusion_matrix.py
@@ -88,6 +88,18 @@ def test_confusion_matrix_counts(sample_df):
     np.testing.assert_array_equal(actual, expected)
 
 
+def test_cohen_kappa(sample_df):
+    """κ = (p_o - p_e) / (1 - p_e)
+    p_o = 4/6 (4 agreements out of 6 rows)
+    p_e = (3/6)^2 + (2/6)^2 + (1/6)^2 = 14/36 = 7/18
+    κ   = (12/18 - 7/18) / (11/18) = 5/11
+    """
+    from lars.util.confusion_matrix import calculate_cohen_kappa
+
+    kappa = calculate_cohen_kappa(sample_df)
+    assert kappa == pytest.approx(5 / 11)
+
+
 def test_normalized_confusion_matrix_values(sample_df):
     """Row-normalized values for each true class."""
     from lars.util.confusion_matrix import plot_confusion_matrix