From ac9cb25eb5037eb439b4e34c6fc8485e124f162b Mon Sep 17 00:00:00 2001 From: Sean Koval Date: Fri, 13 Feb 2026 16:38:58 -0500 Subject: [PATCH] feat(openquant): add AFML ch9 hyperparameter_tuning module --- crates/openquant/src/hyperparameter_tuning.rs | 425 ++++++++++++++++++ crates/openquant/src/lib.rs | 1 + .../openquant/tests/hyperparameter_tuning.rs | 177 ++++++++ docs-site/src/data/afmlDocsState.ts | 14 + docs-site/src/data/moduleDocs.ts | 43 ++ docs-site/src/pages/api-reference.astro | 1 + 6 files changed, 661 insertions(+) create mode 100644 crates/openquant/src/hyperparameter_tuning.rs create mode 100644 crates/openquant/tests/hyperparameter_tuning.rs diff --git a/crates/openquant/src/hyperparameter_tuning.rs b/crates/openquant/src/hyperparameter_tuning.rs new file mode 100644 index 0000000..5f74749 --- /dev/null +++ b/crates/openquant/src/hyperparameter_tuning.rs @@ -0,0 +1,425 @@ +//! Leakage-aware hyperparameter search utilities aligned to AFML Chapter 9. +//! +//! This module provides deterministic grid/randomized search wrappers on top of +//! `PurgedKFold`, with scoring options that preserve sample-weight semantics in +//! both fit and evaluation paths. + +use std::collections::BTreeMap; + +use chrono::NaiveDateTime; +use rand::rngs::StdRng; +use rand::{Rng, SeedableRng}; + +use crate::cross_validation::{PurgedKFold, SimpleClassifier}; + +#[derive(Debug, Clone, PartialEq)] +pub enum HyperParamValue { + Int(i64), + Float(f64), + Bool(bool), +} + +impl HyperParamValue { + pub fn as_i64(&self) -> Option { + match self { + Self::Int(v) => Some(*v), + _ => None, + } + } + + pub fn as_f64(&self) -> Option { + match self { + Self::Int(v) => Some(*v as f64), + Self::Float(v) => Some(*v), + _ => None, + } + } + + pub fn as_bool(&self) -> Option { + match self { + Self::Bool(v) => Some(*v), + _ => None, + } + } +} + +pub type ParamSet = BTreeMap; + +#[derive(Debug, Clone)] +pub enum RandomParamDistribution { + Choice(Vec), + Uniform { low: f64, high: f64 }, + LogUniform { low: f64, high: f64 }, + IntRangeInclusive { low: i64, high: i64 }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SearchScoring { + Accuracy, + BalancedAccuracy, + NegLogLoss, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SearchTrial { + pub params: ParamSet, + pub fold_scores: Vec, + pub mean_score: f64, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct SearchResult { + pub best_params: ParamSet, + pub best_score: f64, + pub trials: Vec, +} + +pub fn sample_log_uniform( + low: f64, + high: f64, + rng: &mut R, +) -> Result { + if low <= 0.0 || high <= 0.0 { + return Err("log-uniform bounds must be strictly positive".to_string()); + } + if low >= high { + return Err("log-uniform low must be < high".to_string()); + } + let log_low = low.ln(); + let log_high = high.ln(); + let draw = rng.gen_range(log_low..log_high); + Ok(draw.exp()) +} + +pub fn classification_score( + y_true: &[f64], + probabilities: &[f64], + sample_weight: Option<&[f64]>, + scoring: SearchScoring, +) -> Result { + if y_true.is_empty() { + return Err("y_true cannot be empty".to_string()); + } + if probabilities.len() != y_true.len() { + return Err("probabilities/y_true length mismatch".to_string()); + } + if let Some(sw) = sample_weight { + if sw.len() != y_true.len() { + return Err("sample_weight length mismatch".to_string()); + } + if sw.iter().any(|w| *w < 0.0) { + return Err("sample_weight cannot contain negative values".to_string()); + } + } + if probabilities.iter().any(|p| !p.is_finite() || *p < 0.0 || *p > 1.0) { + return Err("probabilities must be finite and in [0,1]".to_string()); + } + if y_true.iter().any(|y| (*y - 0.0).abs() > 1e-12 && (*y - 1.0).abs() > 1e-12) { + return Err("y_true must contain only binary labels in {0,1}".to_string()); + } + + let mut sum_w = 0.0; + let mut weighted_correct = 0.0; + let mut weighted_loss = 0.0; + + let mut pos_total = 0.0; + let mut neg_total = 0.0; + let mut pos_correct = 0.0; + let mut neg_correct = 0.0; + + let eps = 1e-15; + for i in 0..y_true.len() { + let w = sample_weight.map(|sw| sw[i]).unwrap_or(1.0); + if w == 0.0 { + continue; + } + let y = y_true[i]; + let p = probabilities[i].max(eps).min(1.0 - eps); + let pred = if probabilities[i] >= 0.5 { 1.0 } else { 0.0 }; + + sum_w += w; + if (pred - y).abs() < 1e-12 { + weighted_correct += w; + } + + weighted_loss += -w * (y * p.ln() + (1.0 - y) * (1.0 - p).ln()); + + if y == 1.0 { + pos_total += w; + if pred == 1.0 { + pos_correct += w; + } + } else { + neg_total += w; + if pred == 0.0 { + neg_correct += w; + } + } + } + + if sum_w <= 0.0 { + return Err("sum of sample_weight must be > 0".to_string()); + } + + let accuracy = weighted_correct / sum_w; + let neg_log_loss = -(weighted_loss / sum_w); + + match scoring { + SearchScoring::Accuracy => Ok(accuracy), + SearchScoring::NegLogLoss => Ok(neg_log_loss), + SearchScoring::BalancedAccuracy => { + // Handle single-class folds by averaging recall over classes present in the fold. + let mut recalls = Vec::new(); + if pos_total > 0.0 { + recalls.push(pos_correct / pos_total); + } + if neg_total > 0.0 { + recalls.push(neg_correct / neg_total); + } + if recalls.is_empty() { + return Err("balanced accuracy requires at least one labeled sample".to_string()); + } + Ok(recalls.iter().sum::() / recalls.len() as f64) + } + } +} + +pub fn expand_param_grid( + param_grid: &BTreeMap>, +) -> Result, String> { + if param_grid.is_empty() { + return Err("param_grid cannot be empty".to_string()); + } + for (name, values) in param_grid { + if values.is_empty() { + return Err(format!("param_grid entry '{name}' cannot be empty")); + } + } + + let keys: Vec = param_grid.keys().cloned().collect(); + let mut out = Vec::new(); + let mut current = ParamSet::new(); + expand_grid_recursive(&keys, 0, param_grid, &mut current, &mut out); + Ok(out) +} + +fn expand_grid_recursive( + keys: &[String], + idx: usize, + grid: &BTreeMap>, + current: &mut ParamSet, + out: &mut Vec, +) { + if idx == keys.len() { + out.push(current.clone()); + return; + } + + let key = &keys[idx]; + if let Some(values) = grid.get(key) { + for value in values { + current.insert(key.clone(), value.clone()); + expand_grid_recursive(keys, idx + 1, grid, current, out); + } + } +} + +pub struct SearchData<'a> { + pub x: &'a [Vec], + pub y: &'a [f64], + pub sample_weight: Option<&'a [f64]>, + pub samples_info_sets: &'a [(NaiveDateTime, NaiveDateTime)], +} + +pub fn grid_search( + build_classifier: F, + param_grid: &BTreeMap>, + data: SearchData<'_>, + n_splits: usize, + pct_embargo: f64, + scoring: SearchScoring, +) -> Result +where + C: SimpleClassifier, + F: Fn(&ParamSet) -> C, +{ + let params = expand_param_grid(param_grid)?; + search_over_params(build_classifier, params, data, n_splits, pct_embargo, scoring) +} + +pub fn randomized_search( + build_classifier: F, + param_space: &BTreeMap, + n_iter: usize, + seed: u64, + data: SearchData<'_>, + n_splits: usize, + pct_embargo: f64, + scoring: SearchScoring, +) -> Result +where + C: SimpleClassifier, + F: Fn(&ParamSet) -> C, +{ + if param_space.is_empty() { + return Err("param_space cannot be empty".to_string()); + } + if n_iter == 0 { + return Err("n_iter must be > 0".to_string()); + } + + let mut rng = StdRng::seed_from_u64(seed); + let keys: Vec = param_space.keys().cloned().collect(); + let mut params = Vec::with_capacity(n_iter); + for _ in 0..n_iter { + let mut draw = ParamSet::new(); + for key in &keys { + let dist = param_space + .get(key) + .ok_or_else(|| format!("missing distribution for key '{key}'"))?; + let value = sample_distribution(dist, &mut rng)?; + draw.insert(key.clone(), value); + } + params.push(draw); + } + + search_over_params(build_classifier, params, data, n_splits, pct_embargo, scoring) +} + +fn sample_distribution( + dist: &RandomParamDistribution, + rng: &mut R, +) -> Result { + match dist { + RandomParamDistribution::Choice(values) => { + if values.is_empty() { + return Err("choice distribution cannot be empty".to_string()); + } + let idx = rng.gen_range(0..values.len()); + Ok(values[idx].clone()) + } + RandomParamDistribution::Uniform { low, high } => { + if !low.is_finite() || !high.is_finite() || low >= high { + return Err("uniform bounds must be finite and satisfy low < high".to_string()); + } + Ok(HyperParamValue::Float(rng.gen_range(*low..*high))) + } + RandomParamDistribution::LogUniform { low, high } => { + let v = sample_log_uniform(*low, *high, rng)?; + Ok(HyperParamValue::Float(v)) + } + RandomParamDistribution::IntRangeInclusive { low, high } => { + if low > high { + return Err("IntRangeInclusive requires low <= high".to_string()); + } + Ok(HyperParamValue::Int(rng.gen_range(*low..=*high))) + } + } +} + +fn search_over_params( + build_classifier: F, + param_sets: Vec, + data: SearchData<'_>, + n_splits: usize, + pct_embargo: f64, + scoring: SearchScoring, +) -> Result +where + C: SimpleClassifier, + F: Fn(&ParamSet) -> C, +{ + validate_search_data(&data, n_splits)?; + let cv = PurgedKFold::new(n_splits, data.samples_info_sets.to_vec(), pct_embargo)?; + let splits = cv.split(data.x.len())?; + + let mut trials = Vec::with_capacity(param_sets.len()); + for params in param_sets { + let fold_scores = evaluate_params( + &build_classifier, + ¶ms, + &splits, + data.x, + data.y, + data.sample_weight, + scoring, + )?; + let mean_score = fold_scores.iter().sum::() / fold_scores.len() as f64; + trials.push(SearchTrial { params, fold_scores, mean_score }); + } + + let best = trials + .iter() + .max_by(|a, b| a.mean_score.partial_cmp(&b.mean_score).unwrap_or(std::cmp::Ordering::Equal)) + .cloned() + .ok_or_else(|| "no trials produced".to_string())?; + + Ok(SearchResult { best_params: best.params, best_score: best.mean_score, trials }) +} + +fn validate_search_data(data: &SearchData<'_>, n_splits: usize) -> Result<(), String> { + if data.x.is_empty() { + return Err("x cannot be empty".to_string()); + } + if data.y.is_empty() { + return Err("y cannot be empty".to_string()); + } + if data.x.len() != data.y.len() { + return Err("x/y length mismatch".to_string()); + } + if data.samples_info_sets.len() != data.x.len() { + return Err("samples_info_sets length must match x length".to_string()); + } + if n_splits < 2 { + return Err("n_splits must be >= 2".to_string()); + } + if let Some(sw) = data.sample_weight { + if sw.len() != data.y.len() { + return Err("sample_weight length mismatch".to_string()); + } + if sw.iter().any(|w| *w < 0.0) { + return Err("sample_weight cannot contain negative values".to_string()); + } + } + Ok(()) +} + +fn evaluate_params( + build_classifier: &F, + params: &ParamSet, + splits: &[(Vec, Vec)], + x: &[Vec], + y: &[f64], + sample_weight: Option<&[f64]>, + scoring: SearchScoring, +) -> Result, String> +where + C: SimpleClassifier, + F: Fn(&ParamSet) -> C, +{ + let mut fold_scores = Vec::with_capacity(splits.len()); + + for (train_idx, test_idx) in splits { + if train_idx.is_empty() || test_idx.is_empty() { + return Err("PurgedKFold generated an empty train/test fold".to_string()); + } + + let x_train: Vec> = train_idx.iter().map(|i| x[*i].clone()).collect(); + let y_train: Vec = train_idx.iter().map(|i| y[*i]).collect(); + let x_test: Vec> = test_idx.iter().map(|i| x[*i].clone()).collect(); + let y_test: Vec = test_idx.iter().map(|i| y[*i]).collect(); + + let sw_train: Option> = + sample_weight.map(|sw| train_idx.iter().map(|i| sw[*i]).collect()); + let sw_test: Option> = + sample_weight.map(|sw| test_idx.iter().map(|i| sw[*i]).collect()); + + let mut clf = build_classifier(params); + clf.fit(&x_train, &y_train, sw_train.as_deref()); + let probs = clf.predict_proba(&x_test); + let fold_score = classification_score(&y_test, &probs, sw_test.as_deref(), scoring)?; + fold_scores.push(fold_score); + } + + Ok(fold_scores) +} diff --git a/crates/openquant/src/lib.rs b/crates/openquant/src/lib.rs index 8964c04..568ac8b 100644 --- a/crates/openquant/src/lib.rs +++ b/crates/openquant/src/lib.rs @@ -13,6 +13,7 @@ pub mod fingerprint; pub mod fracdiff; pub mod hcaa; pub mod hrp; +pub mod hyperparameter_tuning; pub mod labeling; pub mod microstructural_features; pub mod onc; diff --git a/crates/openquant/tests/hyperparameter_tuning.rs b/crates/openquant/tests/hyperparameter_tuning.rs new file mode 100644 index 0000000..65e7aaa --- /dev/null +++ b/crates/openquant/tests/hyperparameter_tuning.rs @@ -0,0 +1,177 @@ +use std::collections::BTreeMap; + +use chrono::NaiveDateTime; +use openquant::cross_validation::SimpleClassifier; +use openquant::hyperparameter_tuning::{ + classification_score, grid_search, randomized_search, sample_log_uniform, HyperParamValue, + ParamSet, RandomParamDistribution, SearchData, SearchScoring, +}; +use rand::rngs::StdRng; +use rand::SeedableRng; + +fn make_series( + start: &str, + periods: usize, + freq_minutes: i64, +) -> Vec<(NaiveDateTime, NaiveDateTime)> { + let start_dt = NaiveDateTime::parse_from_str(start, "%Y-%m-%d %H:%M:%S").unwrap(); + (0..periods) + .map(|i| { + let idx = start_dt + chrono::Duration::minutes(i as i64 * freq_minutes); + let val = idx + chrono::Duration::minutes(3); + (idx, val) + }) + .collect() +} + +struct ThresholdClassifier { + threshold: f64, + sharpness: f64, + trained_prior: f64, +} + +impl ThresholdClassifier { + fn from_params(params: &ParamSet) -> Self { + let threshold = params.get("threshold").and_then(HyperParamValue::as_f64).unwrap_or(0.5); + let sharpness = params.get("sharpness").and_then(HyperParamValue::as_f64).unwrap_or(6.0); + + Self { threshold, sharpness, trained_prior: 0.5 } + } +} + +impl SimpleClassifier for ThresholdClassifier { + fn fit(&mut self, _x: &[Vec], y: &[f64], sample_weight: Option<&[f64]>) { + let mut total_w = 0.0; + let mut pos_w = 0.0; + for (i, yi) in y.iter().enumerate() { + let w = sample_weight.map(|sw| sw[i]).unwrap_or(1.0); + total_w += w; + if *yi == 1.0 { + pos_w += w; + } + } + self.trained_prior = if total_w > 0.0 { pos_w / total_w } else { 0.5 }; + } + + fn predict_proba(&self, x: &[Vec]) -> Vec { + x.iter() + .map(|row| { + let z = (row[0] - self.threshold) * self.sharpness; + let logistic = 1.0 / (1.0 + (-z).exp()); + // Blend threshold behavior with class prior learned from weighted fit. + (0.85 * logistic + 0.15 * self.trained_prior).clamp(0.0, 1.0) + }) + .collect() + } +} + +#[test] +fn test_grid_search_with_purged_kfold_and_embargo() { + let n = 120usize; + let x: Vec> = (0..n).map(|i| vec![i as f64 / (n as f64 - 1.0)]).collect(); + let y: Vec = x.iter().map(|v| if v[0] >= 0.7 { 1.0 } else { 0.0 }).collect(); + let sample_weight: Vec = y.iter().map(|yi| if *yi == 1.0 { 4.0 } else { 1.0 }).collect(); + let info_sets = make_series("2019-01-01 00:00:00", n, 1); + + let mut param_grid = BTreeMap::new(); + param_grid.insert( + "threshold".to_string(), + vec![HyperParamValue::Float(0.5), HyperParamValue::Float(0.7), HyperParamValue::Float(0.9)], + ); + param_grid.insert( + "sharpness".to_string(), + vec![HyperParamValue::Float(4.0), HyperParamValue::Float(8.0)], + ); + + let result = grid_search( + ThresholdClassifier::from_params, + ¶m_grid, + SearchData { + x: &x, + y: &y, + sample_weight: Some(&sample_weight), + samples_info_sets: &info_sets, + }, + 4, + 0.02, + SearchScoring::NegLogLoss, + ) + .unwrap(); + + assert_eq!(result.trials.len(), 6); + assert!(result.best_score.is_finite()); + + let best_threshold = + result.best_params.get("threshold").and_then(HyperParamValue::as_f64).unwrap(); + assert!((best_threshold - 0.7).abs() < 1e-9); +} + +#[test] +fn test_randomized_search_seeded_deterministic_and_log_uniform() { + let n = 90usize; + let x: Vec> = (0..n).map(|i| vec![i as f64 / (n as f64 - 1.0)]).collect(); + let y: Vec = x.iter().map(|v| if v[0] >= 0.65 { 1.0 } else { 0.0 }).collect(); + let info_sets = make_series("2019-01-01 00:00:00", n, 1); + + let mut param_space = BTreeMap::new(); + param_space.insert( + "threshold".to_string(), + RandomParamDistribution::Uniform { low: 0.45, high: 0.85 }, + ); + param_space.insert( + "sharpness".to_string(), + RandomParamDistribution::LogUniform { low: 1e-1, high: 2e1 }, + ); + + let run = || { + randomized_search( + ThresholdClassifier::from_params, + ¶m_space, + 12, + 42, + SearchData { x: &x, y: &y, sample_weight: None, samples_info_sets: &info_sets }, + 3, + 0.01, + SearchScoring::BalancedAccuracy, + ) + .unwrap() + }; + + let first = run(); + let second = run(); + assert_eq!(first.best_params, second.best_params); + assert!((first.best_score - second.best_score).abs() < 1e-12); + assert_eq!(first.trials.len(), 12); + assert_eq!(first.trials, second.trials); + + let mut rng = StdRng::seed_from_u64(7); + let s1 = sample_log_uniform(1e-3, 1e1, &mut rng).unwrap(); + let s2 = sample_log_uniform(1e-3, 1e1, &mut rng).unwrap(); + assert!(s1 >= 1e-3 && s1 <= 1e1); + assert!(s2 >= 1e-3 && s2 <= 1e1); + assert!((s1 - s2).abs() > 1e-12); +} + +#[test] +fn test_scoring_layer_handles_imbalance_weighted_neg_log_loss_and_metrics() { + // Strongly imbalanced labels: 95% class 0. + let mut y = vec![0.0; 95]; + y.extend(vec![1.0; 5]); + + // Majority-like predictions: high confidence toward class 0 for all points. + let probs = vec![0.1; 100]; + + let accuracy = classification_score(&y, &probs, None, SearchScoring::Accuracy).unwrap(); + let balanced = classification_score(&y, &probs, None, SearchScoring::BalancedAccuracy).unwrap(); + let unweighted_nll = classification_score(&y, &probs, None, SearchScoring::NegLogLoss).unwrap(); + + // Upweight minority class mistakes. + let weights: Vec = y.iter().map(|yi| if *yi == 1.0 { 20.0 } else { 1.0 }).collect(); + let weighted_nll = + classification_score(&y, &probs, Some(&weights), SearchScoring::NegLogLoss).unwrap(); + + assert!(accuracy > 0.9); + assert!(balanced < accuracy); + // Weighting minority class should penalize this classifier's cross-entropy. + assert!(weighted_nll < unweighted_nll); +} diff --git a/docs-site/src/data/afmlDocsState.ts b/docs-site/src/data/afmlDocsState.ts index ba01d1a..1e39a90 100644 --- a/docs-site/src/data/afmlDocsState.ts +++ b/docs-site/src/data/afmlDocsState.ts @@ -142,6 +142,20 @@ export const afmlDocsState = { } ] }, + { + "chapter": "CHAPTER 9", + "theme": "Hyper-parameter tuning", + "status": "done", + "chunkCount": 17, + "sections": [ + { + "id": "chapter-9-hyperparameter_tuning", + "module": "hyperparameter_tuning", + "slug": "hyperparameter-tuning", + "status": "done" + } + ] + }, { "chapter": "CHAPTER 10", "theme": "Position sizing", diff --git a/docs-site/src/data/moduleDocs.ts b/docs-site/src/data/moduleDocs.ts index bf75fac..b6dc7d9 100644 --- a/docs-site/src/data/moduleDocs.ts +++ b/docs-site/src/data/moduleDocs.ts @@ -181,6 +181,49 @@ export const moduleDocs: ModuleDoc[] = [ ], notes: ["Threshold selection controls bar frequency and noise level.", "Keep OHLCV semantics consistent across downstream features."], }, + { + slug: "hyperparameter-tuning", + module: "hyperparameter_tuning", + subject: "Sampling, Validation and ML Diagnostics", + summary: "Leakage-aware grid/randomized hyper-parameter search with purged CV and weighted scoring.", + whyItExists: + "AFML Chapter 9 recommends tuning under PurgedKFold, using randomized search for large spaces, and scoring with metrics aligned to trading objectives.", + keyApis: [ + "grid_search", + "randomized_search", + "expand_param_grid", + "sample_log_uniform", + "classification_score", + "SearchScoring", + "RandomParamDistribution", + ], + formulas: [ + { + label: "Purged CV Objective", + latex: "\\hat\\theta=\\arg\\max_{\\theta\\in\\Theta}\\frac{1}{K}\\sum_{k=1}^{K}\\mathrm{Score}(f_\\theta,\\mathcal T_k^{train},\\mathcal T_k^{test})", + }, + { + label: "Log-Uniform Draw", + latex: "\\log x\\sim U(\\log a,\\log b),\\; a>0,\\;x\\in(a,b)", + }, + { + label: "Weighted Neg Log Loss", + latex: "-\\frac{1}{\\sum_i w_i}\\sum_i w_i\\left[y_i\\log p_i + (1-y_i)\\log(1-p_i)\\right]", + }, + ], + examples: [ + { + title: "Randomized search with PurgedKFold semantics", + language: "rust", + code: `use std::collections::BTreeMap;\nuse openquant::hyperparameter_tuning::{\n randomized_search, RandomParamDistribution, SearchData, SearchScoring,\n};\n\nlet mut space = BTreeMap::new();\nspace.insert(\"C\".to_string(), RandomParamDistribution::LogUniform { low: 1e-2, high: 1e2 });\nspace.insert(\"gamma\".to_string(), RandomParamDistribution::LogUniform { low: 1e-3, high: 1e1 });\n\nlet result = randomized_search(\n build_model,\n &space,\n 25,\n 42,\n SearchData { x: &x, y: &y, sample_weight: Some(&w), samples_info_sets: &info_sets },\n 5,\n 0.01,\n SearchScoring::NegLogLoss,\n)?;\nprintln!(\"best score = {}\", result.best_score);`, + }, + ], + notes: [ + "Use Accuracy only when each prediction has similar economic value (equal bet sizing).", + "Prefer weighted NegLogLoss when probabilities drive position sizing or outcomes have different economic magnitude.", + "BalancedAccuracy is useful for severe class imbalance, especially in meta-labeling where recall of positives matters.", + ], + }, { slug: "ef3m", module: "ef3m", diff --git a/docs-site/src/pages/api-reference.astro b/docs-site/src/pages/api-reference.astro index d5451db..bbba6db 100644 --- a/docs-site/src/pages/api-reference.astro +++ b/docs-site/src/pages/api-reference.astro @@ -19,6 +19,7 @@ import Layout from "../layouts/Layout.astro";
  • sampling::seq_bootstrap, sampling::get_ind_matrix, sampling::get_ind_mat_average_uniqueness
  • cross_validation::ml_cross_val_score, cross_validation::ml_get_train_times, cross_validation::PurgedKFold
  • +
  • hyperparameter_tuning::grid_search, hyperparameter_tuning::randomized_search, hyperparameter_tuning::sample_log_uniform, hyperparameter_tuning::classification_score
  • feature_importance::mean_decrease_impurity, feature_importance::mean_decrease_accuracy, feature_importance::single_feature_importance
  • fingerprint::RegressionModelFingerprint, fingerprint::ClassificationModelFingerprint
  • sb_bagging::SequentiallyBootstrappedBaggingClassifier, sb_bagging::SequentiallyBootstrappedBaggingRegressor