Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- `src/fairness.py` with `fairness_metrics` and `decision_disagreement` helpers
for demographic-parity / equal-opportunity gaps and receiver disagreement.
- Apache 2.0 `LICENSE` + `NOTICE` with third-party data attribution (German
Credit / OpenML `credit-g`, Chiappa 2019 DAG).
- `data/README.md` documenting dataset provenance and the variable mapping.
Expand All @@ -19,8 +21,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `pyproject.toml` packaging the engine with pinned dependencies, a `viz` extra
for the plotting scripts, and Black/Ruff/mypy/pytest/coverage configuration.
- `CITATION.cff` for software + the accompanying (forthcoming) paper.
- `tests/` pytest suite covering `distances`, `linear_anm`, `perception`, and
`data_prep` (with the OpenML fetch mocked — no network access required).
- `tests/` pytest suite covering `distances`, `linear_anm`, `perception`,
`data_prep`, and `fairness` (with the OpenML fetch mocked — no network access
required).
- SPDX headers (`Copyright (c) 2026 José M. Álvarez` / `Apache-2.0`) on all
Python source files.
- GitHub Actions workflows (third-party actions pinned to SHA digests):
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ src/
linear_anm.py # Linear ANM: DAGs, fit, do-operator, counterfactuals
distances.py # W2, KL (KDE), Total Variation between 1-D samples
perception.py # competing-SCM engine + bootstrap CIs
fairness.py # DP/EO gaps and decision disagreement metrics
run_*.py # experiment entrypoints (require the `viz` extra)
plot_structural_combined.py
tests/ # pytest suite (OpenML fetch mocked — no network)
Expand Down
99 changes: 99 additions & 0 deletions src/fairness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# Copyright (c) 2026 José M. Álvarez
# SPDX-License-Identifier: Apache-2.0

"""
Fairness metrics for credit decision experiments.

Provides reusable functions for demographic-parity gaps, equal-opportunity
gaps, and decision disagreement between competing receivers.
"""

import numpy as np


def fairness_metrics(y_true, y_hat, a, tau=0.5):
"""
Compute fairness metrics for a single receiver.

Parameters
----------
y_true : array-like
Ground-truth outcome labels (1 = positive class).
y_hat : array-like
Predicted risk scores.
a : array-like
Protected attribute (0 = female, 1 = male).
tau : float
Decision threshold; accept when y_hat >= tau.

Returns
-------
dict
Acceptance rates, DP gap, TPR/FPR by group, and EO gaps.
"""
d = (y_hat >= tau).astype(int)
female = a == 0
male = a == 1

accept_f = np.mean(d[female])
accept_m = np.mean(d[male])
accept_all = np.mean(d)
dp_gap = accept_f - accept_m # signed: positive = females accepted more

# TPR = P(D=1 | Y=1, A=a)
tpr_f = np.mean(d[female & (y_true == 1)])
tpr_m = np.mean(d[male & (y_true == 1)])
# FPR = P(D=1 | Y=0, A=a)
fpr_f = np.mean(d[female & (y_true == 0)])
fpr_m = np.mean(d[male & (y_true == 0)])

return {
"accept_all": accept_all,
"accept_female": accept_f,
"accept_male": accept_m,
"dp_gap": dp_gap,
"tpr_female": tpr_f,
"tpr_male": tpr_m,
"tpr_gap": tpr_f - tpr_m,
"fpr_female": fpr_f,
"fpr_male": fpr_m,
"fpr_gap": fpr_f - fpr_m,
}


def decision_disagreement(y_hat_a, y_hat_b, a, tau):
"""
Compute decision disagreement between two receivers.

Parameters
----------
y_hat_a, y_hat_b : array-like
Predicted risk scores from receiver A and B.
a : array-like
Protected attribute (0 = female, 1 = male).
tau : float
Decision threshold; accept when y_hat >= tau.

Returns
-------
dict
Overall and group-stratified disagreement rates, counts, and
directional flip counts.
"""
d_a = (y_hat_a >= tau).astype(int)
d_b = (y_hat_b >= tau).astype(int)
disagree = d_a != d_b

r1_grant_r2_deny = (d_a == 1) & (d_b == 0)
r1_deny_r2_grant = (d_a == 0) & (d_b == 1)

return {
"disagree_rate": np.mean(disagree),
"disagree_female": np.mean(disagree[a == 0]),
"disagree_male": np.mean(disagree[a == 1]),
"n_disagree": int(disagree.sum()),
"n_disagree_female": int(disagree[a == 0].sum()),
"n_disagree_male": int(disagree[a == 1].sum()),
"r1_grant_r2_deny": int(r1_grant_r2_deny.sum()),
"r1_deny_r2_grant": int(r1_deny_r2_grant.sum()),
}
61 changes: 10 additions & 51 deletions src/run_fair_decisions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,50 +18,15 @@
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.metrics import auc, average_precision_score, precision_recall_curve, roc_curve

from src.data_prep import load_data
from src.fairness import decision_disagreement, fairness_metrics
from src.linear_anm import CHIAPPA_FULL, CHIAPPA_NO_AY
from src.perception import fit_scms


def fairness_metrics(y_true, y_hat, a, tau=0.5):
"""Compute fairness metrics for a single receiver.

Returns dict with acceptance rates, DP gap, TPR/FPR by group, EO gaps.
"""
d = (y_hat >= tau).astype(int)
female = a == 0
male = a == 1

accept_f = np.mean(d[female])
accept_m = np.mean(d[male])
accept_all = np.mean(d)
dp_gap = accept_f - accept_m # signed: positive = females accepted more

# TPR = P(D=1 | Y=1, A=a)
tpr_f = np.mean(d[female & (y_true == 1)])
tpr_m = np.mean(d[male & (y_true == 1)])
# FPR = P(D=1 | Y=0, A=a)
fpr_f = np.mean(d[female & (y_true == 0)])
fpr_m = np.mean(d[male & (y_true == 0)])

return {
"accept_all": accept_all,
"accept_female": accept_f,
"accept_male": accept_m,
"dp_gap": dp_gap,
"tpr_female": tpr_f,
"tpr_male": tpr_m,
"tpr_gap": tpr_f - tpr_m,
"fpr_female": fpr_f,
"fpr_male": fpr_m,
"fpr_gap": fpr_f - fpr_m,
}


def main():
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
train, test = load_data()
Expand Down Expand Up @@ -137,24 +102,18 @@ def main():
)

# Decision disagreement
d_r1 = (y_hat_r1 >= tau).astype(int)
d_r2 = (y_hat_r2 >= tau).astype(int)
disagree = d_r1 != d_r2
disagree_rate = np.mean(disagree)
disagree_f = np.mean(disagree[a == 0])
disagree_m = np.mean(disagree[a == 1])
n_disagree = int(disagree.sum())
dd = decision_disagreement(y_hat_r1, y_hat_r2, a, tau)
disagree_rate = dd["disagree_rate"]
disagree_f = dd["disagree_female"]
disagree_m = dd["disagree_male"]
n_disagree = dd["n_disagree"]

print("\nDecision disagreement (R1 vs R2):")
print(f" Overall: {disagree_rate:.3f} ({n_disagree} / {len(test)} applicants)")
print(f" Female: {disagree_f:.3f} ({int(disagree[a==0].sum())} / {(a==0).sum()})")
print(f" Male: {disagree_m:.3f} ({int(disagree[a==1].sum())} / {(a==1).sum()})")

# Who flips? (R1 grants but R2 denies, or vice versa)
r1_grant_r2_deny = (d_r1 == 1) & (d_r2 == 0)
r1_deny_r2_grant = (d_r1 == 0) & (d_r2 == 1)
print(f" R1 grants, R2 denies: {r1_grant_r2_deny.sum()}")
print(f" R1 denies, R2 grants: {r1_deny_r2_grant.sum()}")
print(f" Female: {disagree_f:.3f} ({dd['n_disagree_female']} / {(a==0).sum()})")
print(f" Male: {disagree_m:.3f} ({dd['n_disagree_male']} / {(a==1).sum()})")
print(f" R1 grants, R2 denies: {dd['r1_grant_r2_deny']}")
print(f" R1 denies, R2 grants: {dd['r1_deny_r2_grant']}")

# PLOT: 1x2 figure (ROC + PR)
plt.rcParams.update({"font.size": 12, "axes.titlesize": 13, "axes.labelsize": 12})
Expand Down
85 changes: 85 additions & 0 deletions tests/test_fairness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright (c) 2026 José M. Álvarez
# SPDX-License-Identifier: Apache-2.0

"""Tests for src.fairness."""

import numpy as np
import pytest

from src.fairness import decision_disagreement, fairness_metrics


def test_fairness_metrics_perfect_separation_by_group():
"""Known scores produce exact DP, TPR, and FPR gaps."""
y_true = np.array([1, 1, 0, 0, 1, 0])
a = np.array([0, 0, 0, 1, 1, 1])
# Female: accept 2/3; Male: accept 1/3 -> dp_gap = 2/3 - 1/3 = 1/3
y_hat = np.array([0.9, 0.8, 0.2, 0.8, 0.2, 0.1])

m = fairness_metrics(y_true, y_hat, a, tau=0.5)

assert m["accept_female"] == pytest.approx(2 / 3)
assert m["accept_male"] == pytest.approx(1 / 3)
assert m["dp_gap"] == pytest.approx(1 / 3)
assert m["tpr_female"] == pytest.approx(1.0)
assert m["tpr_male"] == pytest.approx(0.0)
assert m["tpr_gap"] == pytest.approx(1.0)
assert m["fpr_female"] == pytest.approx(0.0)
assert m["fpr_male"] == pytest.approx(0.5)
assert m["fpr_gap"] == pytest.approx(-0.5)


def test_fairness_metrics_tau_threshold():
"""Raising tau reduces acceptance rates."""
y_true = np.array([1, 0, 1, 0])
a = np.array([0, 0, 1, 1])
y_hat = np.array([0.6, 0.4, 0.6, 0.4])

low_tau = fairness_metrics(y_true, y_hat, a, tau=0.3)
high_tau = fairness_metrics(y_true, y_hat, a, tau=0.7)

assert low_tau["accept_all"] == pytest.approx(1.0)
assert high_tau["accept_all"] == pytest.approx(0.0)


def test_fairness_metrics_empty_group_safe():
"""np.mean on empty slice returns nan when a group has no Y=1 rows."""
y_true = np.array([0, 0, 1, 1])
a = np.array([0, 0, 1, 1])
y_hat = np.array([0.9, 0.8, 0.7, 0.6])

m = fairness_metrics(y_true, y_hat, a, tau=0.5)

assert np.isnan(m["tpr_female"])
assert m["tpr_male"] == pytest.approx(1.0)


def test_decision_disagreement_symmetric():
"""Swapping receivers preserves overall disagreement rate."""
y_hat_a = np.array([0.9, 0.2, 0.8, 0.1])
y_hat_b = np.array([0.2, 0.9, 0.1, 0.8])
a = np.array([0, 0, 1, 1])
tau = 0.5

dd_ab = decision_disagreement(y_hat_a, y_hat_b, a, tau)
dd_ba = decision_disagreement(y_hat_b, y_hat_a, a, tau)

assert dd_ab["disagree_rate"] == pytest.approx(dd_ba["disagree_rate"])
assert dd_ab["n_disagree"] == dd_ba["n_disagree"]


def test_decision_disagreement_directional_counts():
"""Constructed scores yield exact flip counts."""
y_hat_a = np.array([0.9, 0.1, 0.8, 0.2])
y_hat_b = np.array([0.1, 0.9, 0.2, 0.8])
a = np.array([0, 0, 1, 1])
tau = 0.5

dd = decision_disagreement(y_hat_a, y_hat_b, a, tau)

assert dd["disagree_rate"] == pytest.approx(1.0)
assert dd["n_disagree"] == 4
assert dd["r1_grant_r2_deny"] == 2
assert dd["r1_deny_r2_grant"] == 2
assert dd["n_disagree_female"] == 2
assert dd["n_disagree_male"] == 2
Loading