From ac9cb25eb5037eb439b4e34c6fc8485e124f162b Mon Sep 17 00:00:00 2001
From: Sean Koval <seanmkoval@gmail.com>
Date: Fri, 13 Feb 2026 16:38:58 -0500
Subject: [PATCH] feat(openquant): add AFML ch9 hyperparameter_tuning module

---
 crates/openquant/src/hyperparameter_tuning.rs | 425 ++++++++++++++++++
 crates/openquant/src/lib.rs                   |   1 +
 .../openquant/tests/hyperparameter_tuning.rs  | 177 ++++++++
 docs-site/src/data/afmlDocsState.ts           |  14 +
 docs-site/src/data/moduleDocs.ts              |  43 ++
 docs-site/src/pages/api-reference.astro       |   1 +
 6 files changed, 661 insertions(+)
 create mode 100644 crates/openquant/src/hyperparameter_tuning.rs
 create mode 100644 crates/openquant/tests/hyperparameter_tuning.rs
diff --git a/crates/openquant/src/hyperparameter_tuning.rs b/crates/openquant/src/hyperparameter_tuning.rs
new file mode 100644
index 0000000..5f74749
--- /dev/null
+++ b/crates/openquant/src/hyperparameter_tuning.rs
@@ -0,0 +1,425 @@
+//! Leakage-aware hyperparameter search utilities aligned to AFML Chapter 9.
+//!
+//! This module provides deterministic grid/randomized search wrappers on top of
+//! `PurgedKFold`, with scoring options that preserve sample-weight semantics in
+//! both fit and evaluation paths.
+
+use std::collections::BTreeMap;
+
+use chrono::NaiveDateTime;
+use rand::rngs::StdRng;
+use rand::{Rng, SeedableRng};
+
+use crate::cross_validation::{PurgedKFold, SimpleClassifier};
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum HyperParamValue {
+    Int(i64),
+    Float(f64),
+    Bool(bool),
+}
+
+impl HyperParamValue {
+    pub fn as_i64(&self) -> Option<i64> {
+        match self {
+            Self::Int(v) => Some(*v),
+            _ => None,
+        }
+    }
+
+    pub fn as_f64(&self) -> Option<f64> {
+        match self {
+            Self::Int(v) => Some(*v as f64),
+            Self::Float(v) => Some(*v),
+            _ => None,
+        }
+    }
+
+    pub fn as_bool(&self) -> Option<bool> {
+        match self {
+            Self::Bool(v) => Some(*v),
+            _ => None,
+        }
+    }
+}
+
+pub type ParamSet = BTreeMap<String, HyperParamValue>;
+
+#[derive(Debug, Clone)]
+pub enum RandomParamDistribution {
+    Choice(Vec<HyperParamValue>),
+    Uniform { low: f64, high: f64 },
+    LogUniform { low: f64, high: f64 },
+    IntRangeInclusive { low: i64, high: i64 },
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SearchScoring {
+    Accuracy,
+    BalancedAccuracy,
+    NegLogLoss,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct SearchTrial {
+    pub params: ParamSet,
+    pub fold_scores: Vec<f64>,
+    pub mean_score: f64,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct SearchResult {
+    pub best_params: ParamSet,
+    pub best_score: f64,
+    pub trials: Vec<SearchTrial>,
+}
+
+pub fn sample_log_uniform<R: Rng + ?Sized>(
+    low: f64,
+    high: f64,
+    rng: &mut R,
+) -> Result<f64, String> {
+    if low <= 0.0 || high <= 0.0 {
+        return Err("log-uniform bounds must be strictly positive".to_string());
+    }
+    if low >= high {
+        return Err("log-uniform low must be < high".to_string());
+    }
+    let log_low = low.ln();
+    let log_high = high.ln();
+    let draw = rng.gen_range(log_low..log_high);
+    Ok(draw.exp())
+}
+
+pub fn classification_score(
+    y_true: &[f64],
+    probabilities: &[f64],
+    sample_weight: Option<&[f64]>,
+    scoring: SearchScoring,
+) -> Result<f64, String> {
+    if y_true.is_empty() {
+        return Err("y_true cannot be empty".to_string());
+    }
+    if probabilities.len() != y_true.len() {
+        return Err("probabilities/y_true length mismatch".to_string());
+    }
+    if let Some(sw) = sample_weight {
+        if sw.len() != y_true.len() {
+            return Err("sample_weight length mismatch".to_string());
+        }
+        if sw.iter().any(|w| *w < 0.0) {
+            return Err("sample_weight cannot contain negative values".to_string());
+        }
+    }
+    if probabilities.iter().any(|p| !p.is_finite() || *p < 0.0 || *p > 1.0) {
+        return Err("probabilities must be finite and in [0,1]".to_string());
+    }
+    if y_true.iter().any(|y| (*y - 0.0).abs() > 1e-12 && (*y - 1.0).abs() > 1e-12) {
+        return Err("y_true must contain only binary labels in {0,1}".to_string());
+    }
+
+    let mut sum_w = 0.0;
+    let mut weighted_correct = 0.0;
+    let mut weighted_loss = 0.0;
+
+    let mut pos_total = 0.0;
+    let mut neg_total = 0.0;
+    let mut pos_correct = 0.0;
+    let mut neg_correct = 0.0;
+
+    let eps = 1e-15;
+    for i in 0..y_true.len() {
+        let w = sample_weight.map(|sw| sw[i]).unwrap_or(1.0);
+        if w == 0.0 {
+            continue;
+        }
+        let y = y_true[i];
+        let p = probabilities[i].max(eps).min(1.0 - eps);
+        let pred = if probabilities[i] >= 0.5 { 1.0 } else { 0.0 };
+
+        sum_w += w;
+        if (pred - y).abs() < 1e-12 {
+            weighted_correct += w;
+        }
+
+        weighted_loss += -w * (y * p.ln() + (1.0 - y) * (1.0 - p).ln());
+
+        if y == 1.0 {
+            pos_total += w;
+            if pred == 1.0 {
+                pos_correct += w;
+            }
+        } else {
+            neg_total += w;
+            if pred == 0.0 {
+                neg_correct += w;
+            }
+        }
+    }
+
+    if sum_w <= 0.0 {
+        return Err("sum of sample_weight must be > 0".to_string());
+    }
+
+    let accuracy = weighted_correct / sum_w;
+    let neg_log_loss = -(weighted_loss / sum_w);
+
+    match scoring {
+        SearchScoring::Accuracy => Ok(accuracy),
+        SearchScoring::NegLogLoss => Ok(neg_log_loss),
+        SearchScoring::BalancedAccuracy => {
+            // Handle single-class folds by averaging recall over classes present in the fold.
+            let mut recalls = Vec::new();
+            if pos_total > 0.0 {
+                recalls.push(pos_correct / pos_total);
+            }
+            if neg_total > 0.0 {
+                recalls.push(neg_correct / neg_total);
+            }
+            if recalls.is_empty() {
+                return Err("balanced accuracy requires at least one labeled sample".to_string());
+            }
+            Ok(recalls.iter().sum::<f64>() / recalls.len() as f64)
+        }
+    }
+}
+
+pub fn expand_param_grid(
+    param_grid: &BTreeMap<String, Vec<HyperParamValue>>,
+) -> Result<Vec<ParamSet>, String> {
+    if param_grid.is_empty() {
+        return Err("param_grid cannot be empty".to_string());
+    }
+    for (name, values) in param_grid {
+        if values.is_empty() {
+            return Err(format!("param_grid entry '{name}' cannot be empty"));
+        }
+    }
+
+    let keys: Vec<String> = param_grid.keys().cloned().collect();
+    let mut out = Vec::new();
+    let mut current = ParamSet::new();
+    expand_grid_recursive(&keys, 0, param_grid, &mut current, &mut out);
+    Ok(out)
+}
+
+fn expand_grid_recursive(
+    keys: &[String],
+    idx: usize,
+    grid: &BTreeMap<String, Vec<HyperParamValue>>,
+    current: &mut ParamSet,
+    out: &mut Vec<ParamSet>,
+) {
+    if idx == keys.len() {
+        out.push(current.clone());
+        return;
+    }
+
+    let key = &keys[idx];
+    if let Some(values) = grid.get(key) {
+        for value in values {
+            current.insert(key.clone(), value.clone());
+            expand_grid_recursive(keys, idx + 1, grid, current, out);
+        }
+    }
+}
+
+pub struct SearchData<'a> {
+    pub x: &'a [Vec<f64>],
+    pub y: &'a [f64],
+    pub sample_weight: Option<&'a [f64]>,
+    pub samples_info_sets: &'a [(NaiveDateTime, NaiveDateTime)],
+}
+
+pub fn grid_search<C, F>(
+    build_classifier: F,
+    param_grid: &BTreeMap<String, Vec<HyperParamValue>>,
+    data: SearchData<'_>,
+    n_splits: usize,
+    pct_embargo: f64,
+    scoring: SearchScoring,
+) -> Result<SearchResult, String>
+where
+    C: SimpleClassifier,
+    F: Fn(&ParamSet) -> C,
+{
+    let params = expand_param_grid(param_grid)?;
+    search_over_params(build_classifier, params, data, n_splits, pct_embargo, scoring)
+}
+
+pub fn randomized_search<C, F>(
+    build_classifier: F,
+    param_space: &BTreeMap<String, RandomParamDistribution>,
+    n_iter: usize,
+    seed: u64,
+    data: SearchData<'_>,
+    n_splits: usize,
+    pct_embargo: f64,
+    scoring: SearchScoring,
+) -> Result<SearchResult, String>
+where
+    C: SimpleClassifier,
+    F: Fn(&ParamSet) -> C,
+{
+    if param_space.is_empty() {
+        return Err("param_space cannot be empty".to_string());
+    }
+    if n_iter == 0 {
+        return Err("n_iter must be > 0".to_string());
+    }
+
+    let mut rng = StdRng::seed_from_u64(seed);
+    let keys: Vec<String> = param_space.keys().cloned().collect();
+    let mut params = Vec::with_capacity(n_iter);
+    for _ in 0..n_iter {
+        let mut draw = ParamSet::new();
+        for key in &keys {
+            let dist = param_space
+                .get(key)
+                .ok_or_else(|| format!("missing distribution for key '{key}'"))?;
+            let value = sample_distribution(dist, &mut rng)?;
+            draw.insert(key.clone(), value);
+        }
+        params.push(draw);
+    }
+
+    search_over_params(build_classifier, params, data, n_splits, pct_embargo, scoring)
+}
+
+fn sample_distribution<R: Rng + ?Sized>(
+    dist: &RandomParamDistribution,
+    rng: &mut R,
+) -> Result<HyperParamValue, String> {
+    match dist {
+        RandomParamDistribution::Choice(values) => {
+            if values.is_empty() {
+                return Err("choice distribution cannot be empty".to_string());
+            }
+            let idx = rng.gen_range(0..values.len());
+            Ok(values[idx].clone())
+        }
+        RandomParamDistribution::Uniform { low, high } => {
+            if !low.is_finite() || !high.is_finite() || low >= high {
+                return Err("uniform bounds must be finite and satisfy low < high".to_string());
+            }
+            Ok(HyperParamValue::Float(rng.gen_range(*low..*high)))
+        }
+        RandomParamDistribution::LogUniform { low, high } => {
+            let v = sample_log_uniform(*low, *high, rng)?;
+            Ok(HyperParamValue::Float(v))
+        }
+        RandomParamDistribution::IntRangeInclusive { low, high } => {
+            if low > high {
+                return Err("IntRangeInclusive requires low <= high".to_string());
+            }
+            Ok(HyperParamValue::Int(rng.gen_range(*low..=*high)))
+        }
+    }
+}
+
+fn search_over_params<C, F>(
+    build_classifier: F,
+    param_sets: Vec<ParamSet>,
+    data: SearchData<'_>,
+    n_splits: usize,
+    pct_embargo: f64,
+    scoring: SearchScoring,
+) -> Result<SearchResult, String>
+where
+    C: SimpleClassifier,
+    F: Fn(&ParamSet) -> C,
+{
+    validate_search_data(&data, n_splits)?;
+    let cv = PurgedKFold::new(n_splits, data.samples_info_sets.to_vec(), pct_embargo)?;
+    let splits = cv.split(data.x.len())?;
+
+    let mut trials = Vec::with_capacity(param_sets.len());
+    for params in param_sets {
+        let fold_scores = evaluate_params(
+            &build_classifier,
+            &params,
+            &splits,
+            data.x,
+            data.y,
+            data.sample_weight,
+            scoring,
+        )?;
+        let mean_score = fold_scores.iter().sum::<f64>() / fold_scores.len() as f64;
+        trials.push(SearchTrial { params, fold_scores, mean_score });
+    }
+
+    let best = trials
+        .iter()
+        .max_by(|a, b| a.mean_score.partial_cmp(&b.mean_score).unwrap_or(std::cmp::Ordering::Equal))
+        .cloned()
+        .ok_or_else(|| "no trials produced".to_string())?;
+
+    Ok(SearchResult { best_params: best.params, best_score: best.mean_score, trials })
+}
+
+fn validate_search_data(data: &SearchData<'_>, n_splits: usize) -> Result<(), String> {
+    if data.x.is_empty() {
+        return Err("x cannot be empty".to_string());
+    }
+    if data.y.is_empty() {
+        return Err("y cannot be empty".to_string());
+    }
+    if data.x.len() != data.y.len() {
+        return Err("x/y length mismatch".to_string());
+    }
+    if data.samples_info_sets.len() != data.x.len() {
+        return Err("samples_info_sets length must match x length".to_string());
+    }
+    if n_splits < 2 {
+        return Err("n_splits must be >= 2".to_string());
+    }
+    if let Some(sw) = data.sample_weight {
+        if sw.len() != data.y.len() {
+            return Err("sample_weight length mismatch".to_string());
+        }
+        if sw.iter().any(|w| *w < 0.0) {
+            return Err("sample_weight cannot contain negative values".to_string());
+        }
+    }
+    Ok(())
+}
+
+fn evaluate_params<C, F>(
+    build_classifier: &F,
+    params: &ParamSet,
+    splits: &[(Vec<usize>, Vec<usize>)],
+    x: &[Vec<f64>],
+    y: &[f64],
+    sample_weight: Option<&[f64]>,
+    scoring: SearchScoring,
+) -> Result<Vec<f64>, String>
+where
+    C: SimpleClassifier,
+    F: Fn(&ParamSet) -> C,
+{
+    let mut fold_scores = Vec::with_capacity(splits.len());
+
+    for (train_idx, test_idx) in splits {
+        if train_idx.is_empty() || test_idx.is_empty() {
+            return Err("PurgedKFold generated an empty train/test fold".to_string());
+        }
+
+        let x_train: Vec<Vec<f64>> = train_idx.iter().map(|i| x[*i].clone()).collect();
+        let y_train: Vec<f64> = train_idx.iter().map(|i| y[*i]).collect();
+        let x_test: Vec<Vec<f64>> = test_idx.iter().map(|i| x[*i].clone()).collect();
+        let y_test: Vec<f64> = test_idx.iter().map(|i| y[*i]).collect();
+
+        let sw_train: Option<Vec<f64>> =
+            sample_weight.map(|sw| train_idx.iter().map(|i| sw[*i]).collect());
+        let sw_test: Option<Vec<f64>> =
+            sample_weight.map(|sw| test_idx.iter().map(|i| sw[*i]).collect());
+
+        let mut clf = build_classifier(params);
+        clf.fit(&x_train, &y_train, sw_train.as_deref());
+        let probs = clf.predict_proba(&x_test);
+        let fold_score = classification_score(&y_test, &probs, sw_test.as_deref(), scoring)?;
+        fold_scores.push(fold_score);
+    }
+
+    Ok(fold_scores)
+}
diff --git a/crates/openquant/src/lib.rs b/crates/openquant/src/lib.rs
index 8964c04..568ac8b 100644
--- a/crates/openquant/src/lib.rs
+++ b/crates/openquant/src/lib.rs
@@ -13,6 +13,7 @@ pub mod fingerprint;
 pub mod fracdiff;
 pub mod hcaa;
 pub mod hrp;
+pub mod hyperparameter_tuning;
 pub mod labeling;
 pub mod microstructural_features;
 pub mod onc;
diff --git a/crates/openquant/tests/hyperparameter_tuning.rs b/crates/openquant/tests/hyperparameter_tuning.rs
new file mode 100644
index 0000000..65e7aaa
--- /dev/null
+++ b/crates/openquant/tests/hyperparameter_tuning.rs
@@ -0,0 +1,177 @@
+use std::collections::BTreeMap;
+
+use chrono::NaiveDateTime;
+use openquant::cross_validation::SimpleClassifier;
+use openquant::hyperparameter_tuning::{
+    classification_score, grid_search, randomized_search, sample_log_uniform, HyperParamValue,
+    ParamSet, RandomParamDistribution, SearchData, SearchScoring,
+};
+use rand::rngs::StdRng;
+use rand::SeedableRng;
+
+fn make_series(
+    start: &str,
+    periods: usize,
+    freq_minutes: i64,
+) -> Vec<(NaiveDateTime, NaiveDateTime)> {
+    let start_dt = NaiveDateTime::parse_from_str(start, "%Y-%m-%d %H:%M:%S").unwrap();
+    (0..periods)
+        .map(|i| {
+            let idx = start_dt + chrono::Duration::minutes(i as i64 * freq_minutes);
+            let val = idx + chrono::Duration::minutes(3);
+            (idx, val)
+        })
+        .collect()
+}
+
+struct ThresholdClassifier {
+    threshold: f64,
+    sharpness: f64,
+    trained_prior: f64,
+}
+
+impl ThresholdClassifier {
+    fn from_params(params: &ParamSet) -> Self {
+        let threshold = params.get("threshold").and_then(HyperParamValue::as_f64).unwrap_or(0.5);
+        let sharpness = params.get("sharpness").and_then(HyperParamValue::as_f64).unwrap_or(6.0);
+
+        Self { threshold, sharpness, trained_prior: 0.5 }
+    }
+}
+
+impl SimpleClassifier for ThresholdClassifier {
+    fn fit(&mut self, _x: &[Vec<f64>], y: &[f64], sample_weight: Option<&[f64]>) {
+        let mut total_w = 0.0;
+        let mut pos_w = 0.0;
+        for (i, yi) in y.iter().enumerate() {
+            let w = sample_weight.map(|sw| sw[i]).unwrap_or(1.0);
+            total_w += w;
+            if *yi == 1.0 {
+                pos_w += w;
+            }
+        }
+        self.trained_prior = if total_w > 0.0 { pos_w / total_w } else { 0.5 };
+    }
+
+    fn predict_proba(&self, x: &[Vec<f64>]) -> Vec<f64> {
+        x.iter()
+            .map(|row| {
+                let z = (row[0] - self.threshold) * self.sharpness;
+                let logistic = 1.0 / (1.0 + (-z).exp());
+                // Blend threshold behavior with class prior learned from weighted fit.
+                (0.85 * logistic + 0.15 * self.trained_prior).clamp(0.0, 1.0)
+            })
+            .collect()
+    }
+}
+
+#[test]
+fn test_grid_search_with_purged_kfold_and_embargo() {
+    let n = 120usize;
+    let x: Vec<Vec<f64>> = (0..n).map(|i| vec![i as f64 / (n as f64 - 1.0)]).collect();
+    let y: Vec<f64> = x.iter().map(|v| if v[0] >= 0.7 { 1.0 } else { 0.0 }).collect();
+    let sample_weight: Vec<f64> = y.iter().map(|yi| if *yi == 1.0 { 4.0 } else { 1.0 }).collect();
+    let info_sets = make_series("2019-01-01 00:00:00", n, 1);
+
+    let mut param_grid = BTreeMap::new();
+    param_grid.insert(
+        "threshold".to_string(),
+        vec![HyperParamValue::Float(0.5), HyperParamValue::Float(0.7), HyperParamValue::Float(0.9)],
+    );
+    param_grid.insert(
+        "sharpness".to_string(),
+        vec![HyperParamValue::Float(4.0), HyperParamValue::Float(8.0)],
+    );
+
+    let result = grid_search(
+        ThresholdClassifier::from_params,
+        &param_grid,
+        SearchData {
+            x: &x,
+            y: &y,
+            sample_weight: Some(&sample_weight),
+            samples_info_sets: &info_sets,
+        },
+        4,
+        0.02,
+        SearchScoring::NegLogLoss,
+    )
+    .unwrap();
+
+    assert_eq!(result.trials.len(), 6);
+    assert!(result.best_score.is_finite());
+
+    let best_threshold =
+        result.best_params.get("threshold").and_then(HyperParamValue::as_f64).unwrap();
+    assert!((best_threshold - 0.7).abs() < 1e-9);
+}
+
+#[test]
+fn test_randomized_search_seeded_deterministic_and_log_uniform() {
+    let n = 90usize;
+    let x: Vec<Vec<f64>> = (0..n).map(|i| vec![i as f64 / (n as f64 - 1.0)]).collect();
+    let y: Vec<f64> = x.iter().map(|v| if v[0] >= 0.65 { 1.0 } else { 0.0 }).collect();
+    let info_sets = make_series("2019-01-01 00:00:00", n, 1);
+
+    let mut param_space = BTreeMap::new();
+    param_space.insert(
+        "threshold".to_string(),
+        RandomParamDistribution::Uniform { low: 0.45, high: 0.85 },
+    );
+    param_space.insert(
+        "sharpness".to_string(),
+        RandomParamDistribution::LogUniform { low: 1e-1, high: 2e1 },
+    );
+
+    let run = || {
+        randomized_search(
+            ThresholdClassifier::from_params,
+            &param_space,
+            12,
+            42,
+            SearchData { x: &x, y: &y, sample_weight: None, samples_info_sets: &info_sets },
+            3,
+            0.01,
+            SearchScoring::BalancedAccuracy,
+        )
+        .unwrap()
+    };
+
+    let first = run();
+    let second = run();
+    assert_eq!(first.best_params, second.best_params);
+    assert!((first.best_score - second.best_score).abs() < 1e-12);
+    assert_eq!(first.trials.len(), 12);
+    assert_eq!(first.trials, second.trials);
+
+    let mut rng = StdRng::seed_from_u64(7);
+    let s1 = sample_log_uniform(1e-3, 1e1, &mut rng).unwrap();
+    let s2 = sample_log_uniform(1e-3, 1e1, &mut rng).unwrap();
+    assert!(s1 >= 1e-3 && s1 <= 1e1);
+    assert!(s2 >= 1e-3 && s2 <= 1e1);
+    assert!((s1 - s2).abs() > 1e-12);
+}
+
+#[test]
+fn test_scoring_layer_handles_imbalance_weighted_neg_log_loss_and_metrics() {
+    // Strongly imbalanced labels: 95% class 0.
+    let mut y = vec![0.0; 95];
+    y.extend(vec![1.0; 5]);
+
+    // Majority-like predictions: high confidence toward class 0 for all points.
+    let probs = vec![0.1; 100];
+
+    let accuracy = classification_score(&y, &probs, None, SearchScoring::Accuracy).unwrap();
+    let balanced = classification_score(&y, &probs, None, SearchScoring::BalancedAccuracy).unwrap();
+    let unweighted_nll = classification_score(&y, &probs, None, SearchScoring::NegLogLoss).unwrap();
+
+    // Upweight minority class mistakes.
+    let weights: Vec<f64> = y.iter().map(|yi| if *yi == 1.0 { 20.0 } else { 1.0 }).collect();
+    let weighted_nll =
+        classification_score(&y, &probs, Some(&weights), SearchScoring::NegLogLoss).unwrap();
+
+    assert!(accuracy > 0.9);
+    assert!(balanced < accuracy);
+    // Weighting minority class should penalize this classifier's cross-entropy.
+    assert!(weighted_nll < unweighted_nll);
+}
diff --git a/docs-site/src/data/afmlDocsState.ts b/docs-site/src/data/afmlDocsState.ts
index ba01d1a..1e39a90 100644
--- a/docs-site/src/data/afmlDocsState.ts
+++ b/docs-site/src/data/afmlDocsState.ts
@@ -142,6 +142,20 @@ export const afmlDocsState = {
         }
       ]
     },
+    {
+      "chapter": "CHAPTER 9",
+      "theme": "Hyper-parameter tuning",
+      "status": "done",
+      "chunkCount": 17,
+      "sections": [
+        {
+          "id": "chapter-9-hyperparameter_tuning",
+          "module": "hyperparameter_tuning",
+          "slug": "hyperparameter-tuning",
+          "status": "done"
+        }
+      ]
+    },
     {
       "chapter": "CHAPTER 10",
       "theme": "Position sizing",
diff --git a/docs-site/src/data/moduleDocs.ts b/docs-site/src/data/moduleDocs.ts
index bf75fac..b6dc7d9 100644
--- a/docs-site/src/data/moduleDocs.ts
+++ b/docs-site/src/data/moduleDocs.ts
@@ -181,6 +181,49 @@ export const moduleDocs: ModuleDoc[] = [
     ],
     notes: ["Threshold selection controls bar frequency and noise level.", "Keep OHLCV semantics consistent across downstream features."],
   },
+  {
+    slug: "hyperparameter-tuning",
+    module: "hyperparameter_tuning",
+    subject: "Sampling, Validation and ML Diagnostics",
+    summary: "Leakage-aware grid/randomized hyper-parameter search with purged CV and weighted scoring.",
+    whyItExists:
+      "AFML Chapter 9 recommends tuning under PurgedKFold, using randomized search for large spaces, and scoring with metrics aligned to trading objectives.",
+    keyApis: [
+      "grid_search",
+      "randomized_search",
+      "expand_param_grid",
+      "sample_log_uniform",
+      "classification_score",
+      "SearchScoring",
+      "RandomParamDistribution",
+    ],
+    formulas: [
+      {
+        label: "Purged CV Objective",
+        latex: "\\hat\\theta=\\arg\\max_{\\theta\\in\\Theta}\\frac{1}{K}\\sum_{k=1}^{K}\\mathrm{Score}(f_\\theta,\\mathcal T_k^{train},\\mathcal T_k^{test})",
+      },
+      {
+        label: "Log-Uniform Draw",
+        latex: "\\log x\\sim U(\\log a,\\log b),\\; a>0,\\;x\\in(a,b)",
+      },
+      {
+        label: "Weighted Neg Log Loss",
+        latex: "-\\frac{1}{\\sum_i w_i}\\sum_i w_i\\left[y_i\\log p_i + (1-y_i)\\log(1-p_i)\\right]",
+      },
+    ],
+    examples: [
+      {
+        title: "Randomized search with PurgedKFold semantics",
+        language: "rust",
+        code: `use std::collections::BTreeMap;\nuse openquant::hyperparameter_tuning::{\n  randomized_search, RandomParamDistribution, SearchData, SearchScoring,\n};\n\nlet mut space = BTreeMap::new();\nspace.insert(\"C\".to_string(), RandomParamDistribution::LogUniform { low: 1e-2, high: 1e2 });\nspace.insert(\"gamma\".to_string(), RandomParamDistribution::LogUniform { low: 1e-3, high: 1e1 });\n\nlet result = randomized_search(\n  build_model,\n  &space,\n  25,\n  42,\n  SearchData { x: &x, y: &y, sample_weight: Some(&w), samples_info_sets: &info_sets },\n  5,\n  0.01,\n  SearchScoring::NegLogLoss,\n)?;\nprintln!(\"best score = {}\", result.best_score);`,
+      },
+    ],
+    notes: [
+      "Use Accuracy only when each prediction has similar economic value (equal bet sizing).",
+      "Prefer weighted NegLogLoss when probabilities drive position sizing or outcomes have different economic magnitude.",
+      "BalancedAccuracy is useful for severe class imbalance, especially in meta-labeling where recall of positives matters.",
+    ],
+  },
   {
     slug: "ef3m",
     module: "ef3m",
diff --git a/docs-site/src/pages/api-reference.astro b/docs-site/src/pages/api-reference.astro
index d5451db..bbba6db 100644
--- a/docs-site/src/pages/api-reference.astro
+++ b/docs-site/src/pages/api-reference.astro
@@ -19,6 +19,7 @@ import Layout from "../layouts/Layout.astro";
   <ul>
     <li><code>sampling::seq_bootstrap</code>, <code>sampling::get_ind_matrix</code>, <code>sampling::get_ind_mat_average_uniqueness</code></li>
     <li><code>cross_validation::ml_cross_val_score</code>, <code>cross_validation::ml_get_train_times</code>, <code>cross_validation::PurgedKFold</code></li>
+    <li><code>hyperparameter_tuning::grid_search</code>, <code>hyperparameter_tuning::randomized_search</code>, <code>hyperparameter_tuning::sample_log_uniform</code>, <code>hyperparameter_tuning::classification_score</code></li>
     <li><code>feature_importance::mean_decrease_impurity</code>, <code>feature_importance::mean_decrease_accuracy</code>, <code>feature_importance::single_feature_importance</code></li>
     <li><code>fingerprint::RegressionModelFingerprint</code>, <code>fingerprint::ClassificationModelFingerprint</code></li>
     <li><code>sb_bagging::SequentiallyBootstrappedBaggingClassifier</code>, <code>sb_bagging::SequentiallyBootstrappedBaggingRegressor</code></li>