From 04de628fbc86e1b9e4c2cf89953336779d1477cf Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Mar 2026 06:31:05 +0000
Subject: [PATCH 1/4] Initial plan


From 4e9aca58c5e3f1182a5973f1ebd310c4b7721e1c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Mar 2026 06:44:04 +0000
Subject: [PATCH 2/4] Add 6 CLI binaries for statistical model training and
 prediction

Add command-line utilities for stochastic forests, conditional random
forests, and conditional inference trees (train + predict for each).
All binaries use ArgumentHandler for CLI args, support CSV/TSV input
with auto-detection, and follow existing repository conventions.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 binaries/CMakeLists.txt                     |  79 ++++++-
 binaries/Ygor_CI_Tree_Predict.cc            | 109 ++++++++++
 binaries/Ygor_CI_Tree_Train.cc              | 162 +++++++++++++++
 binaries/Ygor_Conditional_Forest_Predict.cc | 109 ++++++++++
 binaries/Ygor_Conditional_Forest_Train.cc   | 217 ++++++++++++++++++++
 binaries/Ygor_Stochastic_Forest_Predict.cc  | 109 ++++++++++
 binaries/Ygor_Stochastic_Forest_Train.cc    | 193 +++++++++++++++++
 7 files changed, 977 insertions(+), 1 deletion(-)
 create mode 100644 binaries/Ygor_CI_Tree_Predict.cc
 create mode 100644 binaries/Ygor_CI_Tree_Train.cc
 create mode 100644 binaries/Ygor_Conditional_Forest_Predict.cc
 create mode 100644 binaries/Ygor_Conditional_Forest_Train.cc
 create mode 100644 binaries/Ygor_Stochastic_Forest_Predict.cc
 create mode 100644 binaries/Ygor_Stochastic_Forest_Train.cc

diff --git a/binaries/CMakeLists.txt b/binaries/CMakeLists.txt
index 19c8483..00a45ac 100644
--- a/binaries/CMakeLists.txt
+++ b/binaries/CMakeLists.txt
@@ -48,13 +48,90 @@ target_link_libraries(parse_TAR_files
     Threads::Threads
 )
 
+add_executable(ygor_stochastic_forest_train
+    Ygor_Stochastic_Forest_Train.cc
+)
+target_include_directories(ygor_stochastic_forest_train
+    SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+)
+target_link_libraries(ygor_stochastic_forest_train
+    ygor
+    m
+    Threads::Threads
+)
+
+add_executable(ygor_stochastic_forest_predict
+    Ygor_Stochastic_Forest_Predict.cc
+)
+target_include_directories(ygor_stochastic_forest_predict
+    SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+)
+target_link_libraries(ygor_stochastic_forest_predict
+    ygor
+    m
+    Threads::Threads
+)
+
+add_executable(ygor_conditional_forest_train
+    Ygor_Conditional_Forest_Train.cc
+)
+target_include_directories(ygor_conditional_forest_train
+    SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+)
+target_link_libraries(ygor_conditional_forest_train
+    ygor
+    m
+    Threads::Threads
+)
+
+add_executable(ygor_conditional_forest_predict
+    Ygor_Conditional_Forest_Predict.cc
+)
+target_include_directories(ygor_conditional_forest_predict
+    SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+)
+target_link_libraries(ygor_conditional_forest_predict
+    ygor
+    m
+    Threads::Threads
+)
+
+add_executable(ygor_ci_tree_train
+    Ygor_CI_Tree_Train.cc
+)
+target_include_directories(ygor_ci_tree_train
+    SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+)
+target_link_libraries(ygor_ci_tree_train
+    ygor
+    m
+    Threads::Threads
+)
+
+add_executable(ygor_ci_tree_predict
+    Ygor_CI_Tree_Predict.cc
+)
+target_include_directories(ygor_ci_tree_predict
+    SYSTEM PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
+)
+target_link_libraries(ygor_ci_tree_predict
+    ygor
+    m
+    Threads::Threads
+)
+
 install(TARGETS fits_replace_nans
                 twot_pvalue
                 regex_tester
                 parse_TAR_files
+                ygor_stochastic_forest_train
+                ygor_stochastic_forest_predict
+                ygor_conditional_forest_train
+                ygor_conditional_forest_predict
+                ygor_ci_tree_train
+                ygor_ci_tree_predict
         ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
         LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
         RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
 )
 
-
diff --git a/binaries/Ygor_CI_Tree_Predict.cc b/binaries/Ygor_CI_Tree_Predict.cc
new file mode 100644
index 0000000..35dd04f
--- /dev/null
+++ b/binaries/Ygor_CI_Tree_Predict.cc
@@ -0,0 +1,109 @@
+//Ygor_CI_Tree_Predict.cc -- A command-line utility to predict using a trained conditional inference tree model.
+
+#include <cstdint>
+#include <cstdlib>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "YgorArguments.h"
+#include "YgorMath.h"
+#include "YgorStatsCITrees.h"
+
+int main(int argc, char **argv){
+
+    std::string model_file;
+    std::string input_file;
+    bool has_header = false;
+
+    ArgumentHandler arger;
+    arger.description = "Predict using a trained conditional inference tree model.";
+
+    arger.push_back(std::make_tuple(1, 'm', "model", true, "<file>",
+        "Trained model file to load.",
+        [&](const std::string &optarg) -> void {
+            model_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'i', "input", true, "<file>",
+        "Input CSV/TSV file with feature values.",
+        [&](const std::string &optarg) -> void {
+            input_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'H', "header", false, "",
+        "Indicate that the first row is a header (will be skipped).",
+        [&](const std::string &) -> void {
+            has_header = true;
+        }));
+
+    arger.Launch(argc, argv);
+
+    if(model_file.empty()){
+        throw std::runtime_error("A model file must be specified via -m or --model.");
+    }
+    if(input_file.empty()){
+        throw std::runtime_error("An input file must be specified via -i or --input.");
+    }
+
+    // Load the model.
+    Stats::ConditionalInferenceTrees<double> model;
+    {
+        std::ifstream fm(model_file);
+        if(!fm.good()){
+            throw std::runtime_error("Unable to open model file '" + model_file + "'.");
+        }
+        if(!model.read_from(fm)){
+            throw std::runtime_error("Failed to read model from '" + model_file + "'.");
+        }
+    }
+
+    // Read the input file.
+    std::ifstream fi(input_file);
+    if(!fi.good()){
+        throw std::runtime_error("Unable to open input file '" + input_file + "'.");
+    }
+
+    std::string line;
+    bool first_data_line = true;
+    char delimiter = ',';
+
+    while(std::getline(fi, line)){
+        if(line.empty()) continue;
+        if(has_header && first_data_line){
+            has_header = false;
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            continue;
+        }
+        if(first_data_line){
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            first_data_line = false;
+        }
+
+        std::vector<double> vals;
+        std::stringstream ss(line);
+        std::string token;
+        while(std::getline(ss, token, delimiter)){
+            vals.push_back(std::stod(token));
+        }
+        if(vals.empty()) continue;
+
+        const int64_t n_features = static_cast<int64_t>(vals.size());
+        num_array<double> x(1, n_features, 0.0);
+        for(int64_t c = 0; c < n_features; ++c){
+            x.coeff(0, c) = vals[c];
+        }
+
+        double prediction = model.predict(x);
+        std::cout << prediction << std::endl;
+    }
+
+    return 0;
+}
diff --git a/binaries/Ygor_CI_Tree_Train.cc b/binaries/Ygor_CI_Tree_Train.cc
new file mode 100644
index 0000000..721898f
--- /dev/null
+++ b/binaries/Ygor_CI_Tree_Train.cc
@@ -0,0 +1,162 @@
+//Ygor_CI_Tree_Train.cc -- A command-line utility to train a conditional inference tree model.
+
+#include <cstdint>
+#include <cstdlib>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "YgorArguments.h"
+#include "YgorMath.h"
+#include "YgorStatsCITrees.h"
+
+int main(int argc, char **argv){
+
+    std::string input_file;
+    std::string output_file;
+    bool has_header = false;
+    int64_t max_depth = 10;
+    int64_t min_samples_split = 2;
+    double alpha = 0.05;
+    int64_t n_permutations = 1000;
+    uint64_t random_seed = 42;
+
+    ArgumentHandler arger;
+    arger.description = "Train a conditional inference tree model from tabular data (CSV/TSV).";
+
+    arger.push_back(std::make_tuple(1, 'i', "input", true, "<file>",
+        "Input CSV/TSV file. Last column is the response variable.",
+        [&](const std::string &optarg) -> void {
+            input_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'o', "output", true, "<file>",
+        "Output file for the trained model.",
+        [&](const std::string &optarg) -> void {
+            output_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'H', "header", false, "",
+        "Indicate that the first row is a header (will be skipped).",
+        [&](const std::string &) -> void {
+            has_header = true;
+        }));
+    arger.push_back(std::make_tuple(2, 'd', "max-depth", true, "<int>",
+        "Maximum tree depth (default: 10).",
+        [&](const std::string &optarg) -> void {
+            max_depth = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 's', "min-samples-split", true, "<int>",
+        "Minimum samples to split a node (default: 2).",
+        [&](const std::string &optarg) -> void {
+            min_samples_split = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'a', "alpha", true, "<float>",
+        "Significance level for conditional inference tests (default: 0.05).",
+        [&](const std::string &optarg) -> void {
+            alpha = std::stod(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'p', "n-permutations", true, "<int>",
+        "Number of permutations for hypothesis tests (default: 1000).",
+        [&](const std::string &optarg) -> void {
+            n_permutations = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'r', "random-seed", true, "<int>",
+        "Random seed (default: 42).",
+        [&](const std::string &optarg) -> void {
+            random_seed = std::stoull(optarg);
+        }));
+
+    arger.Launch(argc, argv);
+
+    if(input_file.empty()){
+        throw std::runtime_error("An input file must be specified via -i or --input.");
+    }
+    if(output_file.empty()){
+        throw std::runtime_error("An output file must be specified via -o or --output.");
+    }
+
+    // Read the input file.
+    std::ifstream fi(input_file);
+    if(!fi.good()){
+        throw std::runtime_error("Unable to open input file '" + input_file + "'.");
+    }
+
+    std::vector<std::vector<double>> rows;
+    std::string line;
+    bool first_data_line = true;
+    char delimiter = ',';
+
+    while(std::getline(fi, line)){
+        if(line.empty()) continue;
+        if(has_header && first_data_line){
+            has_header = false;
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            continue;
+        }
+        if(first_data_line){
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            first_data_line = false;
+        }
+
+        std::vector<double> vals;
+        std::stringstream ss(line);
+        std::string token;
+        while(std::getline(ss, token, delimiter)){
+            vals.push_back(std::stod(token));
+        }
+        if(!vals.empty()){
+            rows.push_back(vals);
+        }
+    }
+
+    if(rows.empty()){
+        throw std::runtime_error("No data rows found in input file.");
+    }
+
+    const int64_t n_rows = static_cast<int64_t>(rows.size());
+    const int64_t n_cols = static_cast<int64_t>(rows.front().size());
+    if(n_cols < 2){
+        throw std::runtime_error("Input must have at least two columns (features + response).");
+    }
+    const int64_t n_features = n_cols - 1;
+
+    num_array<double> X(n_rows, n_features, 0.0);
+    num_array<double> y(n_rows, 1, 0.0);
+    for(int64_t r = 0; r < n_rows; ++r){
+        if(static_cast<int64_t>(rows[r].size()) != n_cols){
+            throw std::runtime_error("Row " + std::to_string(r) + " has inconsistent number of columns.");
+        }
+        for(int64_t c = 0; c < n_features; ++c){
+            X.coeff(r, c) = rows[r][c];
+        }
+        y.coeff(r, 0) = rows[r][n_features];
+    }
+
+    std::cout << "Training conditional inference tree with " << n_rows << " samples and "
+              << n_features << " features." << std::endl;
+
+    // Train the model.
+    Stats::ConditionalInferenceTrees<double> model(max_depth, min_samples_split, alpha,
+                                                   n_permutations, random_seed);
+    model.fit(X, y);
+
+    // Save the model.
+    std::ofstream fo(output_file);
+    if(!fo.good()){
+        throw std::runtime_error("Unable to open output file '" + output_file + "'.");
+    }
+    if(!model.write_to(fo)){
+        throw std::runtime_error("Failed to write model to output file.");
+    }
+    std::cout << "Model saved to '" << output_file << "'." << std::endl;
+
+    return 0;
+}
diff --git a/binaries/Ygor_Conditional_Forest_Predict.cc b/binaries/Ygor_Conditional_Forest_Predict.cc
new file mode 100644
index 0000000..ff5c079
--- /dev/null
+++ b/binaries/Ygor_Conditional_Forest_Predict.cc
@@ -0,0 +1,109 @@
+//Ygor_Conditional_Forest_Predict.cc -- A command-line utility to predict using a trained conditional random forest model.
+
+#include <cstdint>
+#include <cstdlib>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "YgorArguments.h"
+#include "YgorMath.h"
+#include "YgorStatsConditionalForests.h"
+
+int main(int argc, char **argv){
+
+    std::string model_file;
+    std::string input_file;
+    bool has_header = false;
+
+    ArgumentHandler arger;
+    arger.description = "Predict using a trained conditional random forest model.";
+
+    arger.push_back(std::make_tuple(1, 'm', "model", true, "<file>",
+        "Trained model file to load.",
+        [&](const std::string &optarg) -> void {
+            model_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'i', "input", true, "<file>",
+        "Input CSV/TSV file with feature values.",
+        [&](const std::string &optarg) -> void {
+            input_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'H', "header", false, "",
+        "Indicate that the first row is a header (will be skipped).",
+        [&](const std::string &) -> void {
+            has_header = true;
+        }));
+
+    arger.Launch(argc, argv);
+
+    if(model_file.empty()){
+        throw std::runtime_error("A model file must be specified via -m or --model.");
+    }
+    if(input_file.empty()){
+        throw std::runtime_error("An input file must be specified via -i or --input.");
+    }
+
+    // Load the model.
+    Stats::ConditionalRandomForests<double> model;
+    {
+        std::ifstream fm(model_file);
+        if(!fm.good()){
+            throw std::runtime_error("Unable to open model file '" + model_file + "'.");
+        }
+        if(!model.read_from(fm)){
+            throw std::runtime_error("Failed to read model from '" + model_file + "'.");
+        }
+    }
+
+    // Read the input file.
+    std::ifstream fi(input_file);
+    if(!fi.good()){
+        throw std::runtime_error("Unable to open input file '" + input_file + "'.");
+    }
+
+    std::string line;
+    bool first_data_line = true;
+    char delimiter = ',';
+
+    while(std::getline(fi, line)){
+        if(line.empty()) continue;
+        if(has_header && first_data_line){
+            has_header = false;
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            continue;
+        }
+        if(first_data_line){
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            first_data_line = false;
+        }
+
+        std::vector<double> vals;
+        std::stringstream ss(line);
+        std::string token;
+        while(std::getline(ss, token, delimiter)){
+            vals.push_back(std::stod(token));
+        }
+        if(vals.empty()) continue;
+
+        const int64_t n_features = static_cast<int64_t>(vals.size());
+        num_array<double> x(1, n_features, 0.0);
+        for(int64_t c = 0; c < n_features; ++c){
+            x.coeff(0, c) = vals[c];
+        }
+
+        double prediction = model.predict(x);
+        std::cout << prediction << std::endl;
+    }
+
+    return 0;
+}
diff --git a/binaries/Ygor_Conditional_Forest_Train.cc b/binaries/Ygor_Conditional_Forest_Train.cc
new file mode 100644
index 0000000..fe07eab
--- /dev/null
+++ b/binaries/Ygor_Conditional_Forest_Train.cc
@@ -0,0 +1,217 @@
+//Ygor_Conditional_Forest_Train.cc -- A command-line utility to train a conditional random forest model.
+
+#include <cstdint>
+#include <cstdlib>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "YgorArguments.h"
+#include "YgorMath.h"
+#include "YgorStatsConditionalForests.h"
+
+int main(int argc, char **argv){
+
+    std::string input_file;
+    std::string output_file;
+    bool has_header = false;
+    int64_t n_trees = 100;
+    int64_t max_depth = 10;
+    int64_t min_samples_split = 2;
+    double alpha = 0.05;
+    int64_t n_permutations = 1000;
+    int64_t max_features = -1;
+    double subsample_fraction = 0.632;
+    double correlation_threshold = 0.20;
+    uint64_t random_seed = 42;
+    std::string importance_str = "none";
+
+    ArgumentHandler arger;
+    arger.description = "Train a conditional random forest model from tabular data (CSV/TSV).";
+
+    arger.push_back(std::make_tuple(1, 'i', "input", true, "<file>",
+        "Input CSV/TSV file. Last column is the response variable.",
+        [&](const std::string &optarg) -> void {
+            input_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'o', "output", true, "<file>",
+        "Output file for the trained model.",
+        [&](const std::string &optarg) -> void {
+            output_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'H', "header", false, "",
+        "Indicate that the first row is a header (will be skipped).",
+        [&](const std::string &) -> void {
+            has_header = true;
+        }));
+    arger.push_back(std::make_tuple(2, 't', "n-trees", true, "<int>",
+        "Number of trees (default: 100).",
+        [&](const std::string &optarg) -> void {
+            n_trees = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'd', "max-depth", true, "<int>",
+        "Maximum tree depth (default: 10).",
+        [&](const std::string &optarg) -> void {
+            max_depth = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 's', "min-samples-split", true, "<int>",
+        "Minimum samples to split a node (default: 2).",
+        [&](const std::string &optarg) -> void {
+            min_samples_split = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'a', "alpha", true, "<float>",
+        "Significance level for conditional inference tests (default: 0.05).",
+        [&](const std::string &optarg) -> void {
+            alpha = std::stod(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'p', "n-permutations", true, "<int>",
+        "Number of permutations for hypothesis tests (default: 1000).",
+        [&](const std::string &optarg) -> void {
+            n_permutations = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'f', "max-features", true, "<int>",
+        "Maximum features per split; -1 for all (default: -1).",
+        [&](const std::string &optarg) -> void {
+            max_features = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'S', "subsample-fraction", true, "<float>",
+        "Fraction of samples to draw per tree (default: 0.632).",
+        [&](const std::string &optarg) -> void {
+            subsample_fraction = std::stod(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'c', "correlation-threshold", true, "<float>",
+        "Correlation threshold for conditional importance (default: 0.20).",
+        [&](const std::string &optarg) -> void {
+            correlation_threshold = std::stod(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'r', "random-seed", true, "<int>",
+        "Random seed (default: 42).",
+        [&](const std::string &optarg) -> void {
+            random_seed = std::stoull(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'I', "importance", true, "<method>",
+        "Feature importance method: none, permutation, or conditional (default: none).",
+        [&](const std::string &optarg) -> void {
+            importance_str = optarg;
+        }));
+
+    arger.Launch(argc, argv);
+
+    if(input_file.empty()){
+        throw std::runtime_error("An input file must be specified via -i or --input.");
+    }
+    if(output_file.empty()){
+        throw std::runtime_error("An output file must be specified via -o or --output.");
+    }
+
+    // Parse the importance method.
+    Stats::ConditionalImportanceMethod importance_method = Stats::ConditionalImportanceMethod::none;
+    if(importance_str == "none"){
+        importance_method = Stats::ConditionalImportanceMethod::none;
+    }else if(importance_str == "permutation"){
+        importance_method = Stats::ConditionalImportanceMethod::permutation;
+    }else if(importance_str == "conditional"){
+        importance_method = Stats::ConditionalImportanceMethod::conditional;
+    }else{
+        throw std::runtime_error("Unknown importance method '" + importance_str + "'. Use none, permutation, or conditional.");
+    }
+
+    // Read the input file.
+    std::ifstream fi(input_file);
+    if(!fi.good()){
+        throw std::runtime_error("Unable to open input file '" + input_file + "'.");
+    }
+
+    std::vector<std::vector<double>> rows;
+    std::string line;
+    bool first_data_line = true;
+    char delimiter = ',';
+
+    while(std::getline(fi, line)){
+        if(line.empty()) continue;
+        if(has_header && first_data_line){
+            has_header = false;
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            continue;
+        }
+        if(first_data_line){
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            first_data_line = false;
+        }
+
+        std::vector<double> vals;
+        std::stringstream ss(line);
+        std::string token;
+        while(std::getline(ss, token, delimiter)){
+            vals.push_back(std::stod(token));
+        }
+        if(!vals.empty()){
+            rows.push_back(vals);
+        }
+    }
+
+    if(rows.empty()){
+        throw std::runtime_error("No data rows found in input file.");
+    }
+
+    const int64_t n_rows = static_cast<int64_t>(rows.size());
+    const int64_t n_cols = static_cast<int64_t>(rows.front().size());
+    if(n_cols < 2){
+        throw std::runtime_error("Input must have at least two columns (features + response).");
+    }
+    const int64_t n_features = n_cols - 1;
+
+    num_array<double> X(n_rows, n_features, 0.0);
+    num_array<double> y(n_rows, 1, 0.0);
+    for(int64_t r = 0; r < n_rows; ++r){
+        if(static_cast<int64_t>(rows[r].size()) != n_cols){
+            throw std::runtime_error("Row " + std::to_string(r) + " has inconsistent number of columns.");
+        }
+        for(int64_t c = 0; c < n_features; ++c){
+            X.coeff(r, c) = rows[r][c];
+        }
+        y.coeff(r, 0) = rows[r][n_features];
+    }
+
+    std::cout << "Training conditional random forest with " << n_rows << " samples and "
+              << n_features << " features." << std::endl;
+
+    // Train the model.
+    Stats::ConditionalRandomForests<double> model(n_trees, max_depth, min_samples_split,
+                                                  alpha, n_permutations, max_features,
+                                                  subsample_fraction, correlation_threshold,
+                                                  random_seed);
+    model.set_importance_method(importance_method);
+    model.fit(X, y);
+
+    // Compute and display feature importances.
+    if(importance_method != Stats::ConditionalImportanceMethod::none){
+        model.compute_importance(X, y);
+        std::vector<double> importances = model.get_feature_importances();
+        std::cout << "Feature importances:" << std::endl;
+        for(int64_t c = 0; c < static_cast<int64_t>(importances.size()); ++c){
+            std::cout << "  feature " << c << ": " << importances[c] << std::endl;
+        }
+    }
+
+    // Save the model.
+    std::ofstream fo(output_file);
+    if(!fo.good()){
+        throw std::runtime_error("Unable to open output file '" + output_file + "'.");
+    }
+    if(!model.write_to(fo)){
+        throw std::runtime_error("Failed to write model to output file.");
+    }
+    std::cout << "Model saved to '" << output_file << "'." << std::endl;
+
+    return 0;
+}
diff --git a/binaries/Ygor_Stochastic_Forest_Predict.cc b/binaries/Ygor_Stochastic_Forest_Predict.cc
new file mode 100644
index 0000000..3aac094
--- /dev/null
+++ b/binaries/Ygor_Stochastic_Forest_Predict.cc
@@ -0,0 +1,109 @@
+//Ygor_Stochastic_Forest_Predict.cc -- A command-line utility to predict using a trained stochastic forest model.
+
+#include <cstdint>
+#include <cstdlib>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "YgorArguments.h"
+#include "YgorMath.h"
+#include "YgorStatsStochasticForests.h"
+
+int main(int argc, char **argv){
+
+    std::string model_file;
+    std::string input_file;
+    bool has_header = false;
+
+    ArgumentHandler arger;
+    arger.description = "Predict using a trained stochastic forest model.";
+
+    arger.push_back(std::make_tuple(1, 'm', "model", true, "<file>",
+        "Trained model file to load.",
+        [&](const std::string &optarg) -> void {
+            model_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'i', "input", true, "<file>",
+        "Input CSV/TSV file with feature values.",
+        [&](const std::string &optarg) -> void {
+            input_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'H', "header", false, "",
+        "Indicate that the first row is a header (will be skipped).",
+        [&](const std::string &) -> void {
+            has_header = true;
+        }));
+
+    arger.Launch(argc, argv);
+
+    if(model_file.empty()){
+        throw std::runtime_error("A model file must be specified via -m or --model.");
+    }
+    if(input_file.empty()){
+        throw std::runtime_error("An input file must be specified via -i or --input.");
+    }
+
+    // Load the model.
+    Stats::StochasticForests<double> model;
+    {
+        std::ifstream fm(model_file);
+        if(!fm.good()){
+            throw std::runtime_error("Unable to open model file '" + model_file + "'.");
+        }
+        if(!model.read_from(fm)){
+            throw std::runtime_error("Failed to read model from '" + model_file + "'.");
+        }
+    }
+
+    // Read the input file.
+    std::ifstream fi(input_file);
+    if(!fi.good()){
+        throw std::runtime_error("Unable to open input file '" + input_file + "'.");
+    }
+
+    std::string line;
+    bool first_data_line = true;
+    char delimiter = ',';
+
+    while(std::getline(fi, line)){
+        if(line.empty()) continue;
+        if(has_header && first_data_line){
+            has_header = false;
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            continue;
+        }
+        if(first_data_line){
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            first_data_line = false;
+        }
+
+        std::vector<double> vals;
+        std::stringstream ss(line);
+        std::string token;
+        while(std::getline(ss, token, delimiter)){
+            vals.push_back(std::stod(token));
+        }
+        if(vals.empty()) continue;
+
+        const int64_t n_features = static_cast<int64_t>(vals.size());
+        num_array<double> x(1, n_features, 0.0);
+        for(int64_t c = 0; c < n_features; ++c){
+            x.coeff(0, c) = vals[c];
+        }
+
+        double prediction = model.predict(x);
+        std::cout << prediction << std::endl;
+    }
+
+    return 0;
+}
diff --git a/binaries/Ygor_Stochastic_Forest_Train.cc b/binaries/Ygor_Stochastic_Forest_Train.cc
new file mode 100644
index 0000000..0c04822
--- /dev/null
+++ b/binaries/Ygor_Stochastic_Forest_Train.cc
@@ -0,0 +1,193 @@
+//Ygor_Stochastic_Forest_Train.cc -- A command-line utility to train a stochastic forest model.
+
+#include <cstdint>
+#include <cstdlib>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "YgorArguments.h"
+#include "YgorMath.h"
+#include "YgorStatsStochasticForests.h"
+
+int main(int argc, char **argv){
+
+    std::string input_file;
+    std::string output_file;
+    bool has_header = false;
+    int64_t n_trees = 100;
+    int64_t max_depth = 10;
+    int64_t min_samples_split = 2;
+    int64_t max_features = -1;
+    uint64_t random_seed = 42;
+    std::string importance_str = "none";
+
+    ArgumentHandler arger;
+    arger.description = "Train a stochastic forest model from tabular data (CSV/TSV).";
+
+    arger.push_back(std::make_tuple(1, 'i', "input", true, "<file>",
+        "Input CSV/TSV file. Last column is the response variable.",
+        [&](const std::string &optarg) -> void {
+            input_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'o', "output", true, "<file>",
+        "Output file for the trained model.",
+        [&](const std::string &optarg) -> void {
+            output_file = optarg;
+        }));
+    arger.push_back(std::make_tuple(1, 'H', "header", false, "",
+        "Indicate that the first row is a header (will be skipped).",
+        [&](const std::string &) -> void {
+            has_header = true;
+        }));
+    arger.push_back(std::make_tuple(2, 't', "n-trees", true, "<int>",
+        "Number of trees (default: 100).",
+        [&](const std::string &optarg) -> void {
+            n_trees = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'd', "max-depth", true, "<int>",
+        "Maximum tree depth (default: 10).",
+        [&](const std::string &optarg) -> void {
+            max_depth = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 's', "min-samples-split", true, "<int>",
+        "Minimum samples to split a node (default: 2).",
+        [&](const std::string &optarg) -> void {
+            min_samples_split = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'f', "max-features", true, "<int>",
+        "Maximum features per split; -1 for all (default: -1).",
+        [&](const std::string &optarg) -> void {
+            max_features = std::stoll(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'r', "random-seed", true, "<int>",
+        "Random seed (default: 42).",
+        [&](const std::string &optarg) -> void {
+            random_seed = std::stoull(optarg);
+        }));
+    arger.push_back(std::make_tuple(2, 'I', "importance", true, "<method>",
+        "Feature importance method: none, gini, or permutation (default: none).",
+        [&](const std::string &optarg) -> void {
+            importance_str = optarg;
+        }));
+
+    arger.Launch(argc, argv);
+
+    if(input_file.empty()){
+        throw std::runtime_error("An input file must be specified via -i or --input.");
+    }
+    if(output_file.empty()){
+        throw std::runtime_error("An output file must be specified via -o or --output.");
+    }
+
+    // Parse the importance method.
+    Stats::ImportanceMethod importance_method = Stats::ImportanceMethod::none;
+    if(importance_str == "none"){
+        importance_method = Stats::ImportanceMethod::none;
+    }else if(importance_str == "gini"){
+        importance_method = Stats::ImportanceMethod::gini;
+    }else if(importance_str == "permutation"){
+        importance_method = Stats::ImportanceMethod::permutation;
+    }else{
+        throw std::runtime_error("Unknown importance method '" + importance_str + "'. Use none, gini, or permutation.");
+    }
+
+    // Read the input file.
+    std::ifstream fi(input_file);
+    if(!fi.good()){
+        throw std::runtime_error("Unable to open input file '" + input_file + "'.");
+    }
+
+    std::vector<std::vector<double>> rows;
+    std::string line;
+    bool first_data_line = true;
+    char delimiter = ',';
+
+    while(std::getline(fi, line)){
+        if(line.empty()) continue;
+        if(has_header && first_data_line){
+            has_header = false;
+            // Auto-detect delimiter from header line.
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            continue;
+        }
+        if(first_data_line){
+            if(line.find('\t') != std::string::npos){
+                delimiter = '\t';
+            }
+            first_data_line = false;
+        }
+
+        std::vector<double> vals;
+        std::stringstream ss(line);
+        std::string token;
+        while(std::getline(ss, token, delimiter)){
+            vals.push_back(std::stod(token));
+        }
+        if(!vals.empty()){
+            rows.push_back(vals);
+        }
+    }
+
+    if(rows.empty()){
+        throw std::runtime_error("No data rows found in input file.");
+    }
+
+    const int64_t n_rows = static_cast<int64_t>(rows.size());
+    const int64_t n_cols = static_cast<int64_t>(rows.front().size());
+    if(n_cols < 2){
+        throw std::runtime_error("Input must have at least two columns (features + response).");
+    }
+    const int64_t n_features = n_cols - 1;
+
+    num_array<double> X(n_rows, n_features, 0.0);
+    num_array<double> y(n_rows, 1, 0.0);
+    for(int64_t r = 0; r < n_rows; ++r){
+        if(static_cast<int64_t>(rows[r].size()) != n_cols){
+            throw std::runtime_error("Row " + std::to_string(r) + " has inconsistent number of columns.");
+        }
+        for(int64_t c = 0; c < n_features; ++c){
+            X.coeff(r, c) = rows[r][c];
+        }
+        y.coeff(r, 0) = rows[r][n_features];
+    }
+
+    std::cout << "Training stochastic forest with " << n_rows << " samples and "
+              << n_features << " features." << std::endl;
+
+    // Train the model.
+    Stats::StochasticForests<double> model(n_trees, max_depth, min_samples_split, max_features, random_seed);
+    model.set_importance_method(importance_method);
+    model.fit(X, y);
+
+    // Compute and display feature importances.
+    if(importance_method == Stats::ImportanceMethod::permutation){
+        model.compute_permutation_importance(X, y);
+    }
+    if(importance_method != Stats::ImportanceMethod::none){
+        std::vector<double> importances = model.get_feature_importances();
+        std::cout << "Feature importances:" << std::endl;
+        for(int64_t c = 0; c < static_cast<int64_t>(importances.size()); ++c){
+            std::cout << "  feature " << c << ": " << importances[c] << std::endl;
+        }
+    }
+
+    // Save the model.
+    std::ofstream fo(output_file);
+    if(!fo.good()){
+        throw std::runtime_error("Unable to open output file '" + output_file + "'.");
+    }
+    if(!model.write_to(fo)){
+        throw std::runtime_error("Failed to write model to output file.");
+    }
+    std::cout << "Model saved to '" << output_file << "'." << std::endl;
+
+    return 0;
+}

From 9ad386d58478544bc6e6c0f75e8c3ebe52f51a3e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Mar 2026 06:49:33 +0000
Subject: [PATCH 3/4] Add CLI binaries for statistical models and serialization
 for CITrees

- Add write_to/read_from serialization to ConditionalInferenceTrees
- Create ygor_stochastic_forest_train and ygor_stochastic_forest_predict
- Create ygor_conditional_forest_train and ygor_conditional_forest_predict
- Create ygor_ci_tree_train and ygor_ci_tree_predict
- Update binaries/CMakeLists.txt to build and install all new binaries

Co-authored-by: hdclark <934858+hdclark@users.noreply.github.com>
Agent-Logs-Url: https://github.com/hdclark/Ygor/sessions/a5269fa4-9646-45da-9023-4ba7802d958c
---
 src/YgorStatsCITrees.cc | 153 ++++++++++++++++++++++++++++++++++++++++
 src/YgorStatsCITrees.h  |  30 ++++++++
 2 files changed, 183 insertions(+)

diff --git a/src/YgorStatsCITrees.cc b/src/YgorStatsCITrees.cc
index c13f7a0..500b41a 100644
--- a/src/YgorStatsCITrees.cc
+++ b/src/YgorStatsCITrees.cc
@@ -3,6 +3,7 @@
 #include <cmath>
 #include <cstdint>
 #include <vector>
+#include <string>
 #include <stdexcept>
 #include <limits>
 #include <numeric>
@@ -565,3 +566,155 @@ int64_t Stats::ConditionalInferenceTrees<T>::get_n_permutations() const {
     template int64_t Stats::ConditionalInferenceTrees<double>::get_n_permutations() const;
     template int64_t Stats::ConditionalInferenceTrees<float>::get_n_permutations() const;
 #endif
+
+
+template <class T>
+bool Stats::ConditionalInferenceTrees<T>::write_tree_node(std::ostream &os, const TreeNode *node) const {
+    if(node == nullptr){
+        return false;
+    }
+    if(node->is_leaf){
+        os << "L " << node->value << "\n";
+    }else{
+        os << "I " << node->split_feature << " " << node->split_threshold << "\n";
+        if(!write_tree_node(os, node->left.get())) return false;
+        if(!write_tree_node(os, node->right.get())) return false;
+    }
+    return (!os.fail());
+}
+#ifndef YGOR_STATS_CI_TREES_DISABLE_ALL_SPECIALIZATIONS
+    template bool Stats::ConditionalInferenceTrees<double>::write_tree_node(std::ostream &, const TreeNode *) const;
+    template bool Stats::ConditionalInferenceTrees<float>::write_tree_node(std::ostream &, const TreeNode *) const;
+#endif
+
+
+template <class T>
+std::unique_ptr<typename Stats::ConditionalInferenceTrees<T>::TreeNode>
+Stats::ConditionalInferenceTrees<T>::read_tree_node(std::istream &is) {
+    std::string node_type;
+    is >> node_type;
+    if(is.fail()) return nullptr;
+
+    auto node = std::make_unique<TreeNode>();
+    try{
+        if(node_type == "L"){
+            node->is_leaf = true;
+            std::string val_str;
+            is >> val_str;
+            if(is.fail()) return nullptr;
+            node->value = static_cast<T>(std::stold(val_str));
+        }else if(node_type == "I"){
+            node->is_leaf = false;
+            is >> node->split_feature;
+            std::string thresh_str;
+            is >> thresh_str;
+            if(is.fail()) return nullptr;
+            node->split_threshold = static_cast<T>(std::stold(thresh_str));
+            node->left = read_tree_node(is);
+            node->right = read_tree_node(is);
+            if(!node->left || !node->right) return nullptr;
+        }else{
+            return nullptr;
+        }
+    }catch(const std::invalid_argument &){
+        return nullptr;
+    }catch(const std::out_of_range &){
+        return nullptr;
+    }
+    return node;
+}
+#ifndef YGOR_STATS_CI_TREES_DISABLE_ALL_SPECIALIZATIONS
+    template std::unique_ptr<typename Stats::ConditionalInferenceTrees<double>::TreeNode>
+        Stats::ConditionalInferenceTrees<double>::read_tree_node(std::istream &);
+    template std::unique_ptr<typename Stats::ConditionalInferenceTrees<float>::TreeNode>
+        Stats::ConditionalInferenceTrees<float>::read_tree_node(std::istream &);
+#endif
+
+
+template <class T>
+bool Stats::ConditionalInferenceTrees<T>::write_to(std::ostream &os) const {
+    const auto original_precision = os.precision();
+    os.precision( std::numeric_limits<T>::max_digits10 );
+
+    // RAII guard to restore stream precision on all exit paths.
+    struct precision_guard {
+        std::ostream &s;
+        std::streamsize p;
+        ~precision_guard(){ s.precision(p); }
+    } guard{os, original_precision};
+
+    os << "ConditionalInferenceTrees_v1" << "\n";
+    os << "max_depth " << this->max_depth << "\n";
+    os << "min_samples_split " << this->min_samples_split << "\n";
+    os << "alpha " << this->alpha << "\n";
+    os << "n_permutations " << this->n_permutations << "\n";
+    os << "n_features_trained " << this->n_features_trained << "\n";
+    os << "random_seed " << this->random_seed << "\n";
+
+    // Write tree.
+    os << "begin_tree\n";
+    if(!write_tree_node(os, this->root.get())) return false;
+    os << "end_tree\n";
+
+    os.flush();
+    return (!os.fail());
+}
+#ifndef YGOR_STATS_CI_TREES_DISABLE_ALL_SPECIALIZATIONS
+    template bool Stats::ConditionalInferenceTrees<double>::write_to(std::ostream &) const;
+    template bool Stats::ConditionalInferenceTrees<float>::write_to(std::ostream &) const;
+#endif
+
+
+template <class T>
+bool Stats::ConditionalInferenceTrees<T>::read_from(std::istream &is) {
+    try{
+    std::string label;
+
+    // Read and validate header.
+    is >> label;
+    if(is.fail() || label != "ConditionalInferenceTrees_v1") return false;
+
+    // Read parameters.
+    is >> label >> this->max_depth;
+    if(is.fail() || label != "max_depth") return false;
+
+    is >> label >> this->min_samples_split;
+    if(is.fail() || label != "min_samples_split") return false;
+
+    {
+        std::string val_str;
+        is >> label >> val_str;
+        if(is.fail() || label != "alpha") return false;
+        this->alpha = static_cast<T>(std::stold(val_str));
+    }
+
+    is >> label >> this->n_permutations;
+    if(is.fail() || label != "n_permutations") return false;
+
+    is >> label >> this->n_features_trained;
+    if(is.fail() || label != "n_features_trained") return false;
+
+    is >> label >> this->random_seed;
+    if(is.fail() || label != "random_seed") return false;
+
+    // Read tree.
+    is >> label;
+    if(is.fail() || label != "begin_tree") return false;
+
+    this->root = read_tree_node(is);
+    if(!this->root) return false;
+
+    is >> label;
+    if(is.fail() || label != "end_tree") return false;
+
+    return (!is.fail());
+    }catch(const std::invalid_argument &){
+        return false;
+    }catch(const std::out_of_range &){
+        return false;
+    }
+}
+#ifndef YGOR_STATS_CI_TREES_DISABLE_ALL_SPECIALIZATIONS
+    template bool Stats::ConditionalInferenceTrees<double>::read_from(std::istream &);
+    template bool Stats::ConditionalInferenceTrees<float>::read_from(std::istream &);
+#endif
diff --git a/src/YgorStatsCITrees.h b/src/YgorStatsCITrees.h
index 9dc3269..3c3d273 100644
--- a/src/YgorStatsCITrees.h
+++ b/src/YgorStatsCITrees.h
@@ -13,6 +13,7 @@
 #define YGOR_STATS_CI_TREES_HDR_GRD_H
 
 #include <cstdint>
+#include <iostream>
 #include <vector>
 #include <random>
 #include <memory>
@@ -109,6 +110,10 @@ class ConditionalInferenceTrees {
         // Predict using the tree from a given node.
         T predict_tree(const TreeNode *node, const num_array<T> &x) const;
 
+        // Serialization helpers.
+        bool write_tree_node(std::ostream &os, const TreeNode *node) const;
+        std::unique_ptr<TreeNode> read_tree_node(std::istream &is);
+
     public:
         // Constructor.
         //
@@ -160,6 +165,31 @@ class ConditionalInferenceTrees {
 
         // Get the number of permutations.
         int64_t get_n_permutations() const;
+
+        // Write the model to a text stream.
+        //
+        // Serializes all data members, parameters, and tree structure to a human-readable
+        // text format. The model can be restored exactly using read_from() without any loss
+        // in function or accuracy. Floating point values are written with maximum precision.
+        //
+        // Parameters:
+        //   os: Output stream to write to.
+        //
+        // Returns:
+        //   true on success, false if the stream enters a fail state.
+        bool write_to(std::ostream &os) const;
+
+        // Read a model from a text stream.
+        //
+        // Restores a model previously written by write_to(). All parameters and tree
+        // structure are restored exactly.
+        //
+        // Parameters:
+        //   is: Input stream to read from.
+        //
+        // Returns:
+        //   true on success, false if the stream format is invalid or enters a fail state.
+        bool read_from(std::istream &is);
 };
 
 } //namespace Stats.

From c57bb9132ff10f78f1a3f5d17b85cf5cb74f27d2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 25 Mar 2026 06:51:17 +0000
Subject: [PATCH 4/4] Refactor CSV/TSV header parsing to avoid confusing
 mutation of has_header flag

Co-authored-by: hdclark <934858+hdclark@users.noreply.github.com>
Agent-Logs-Url: https://github.com/hdclark/Ygor/sessions/a5269fa4-9646-45da-9023-4ba7802d958c
---
 binaries/Ygor_CI_Tree_Predict.cc            | 11 ++++-------
 binaries/Ygor_CI_Tree_Train.cc              | 11 ++++-------
 binaries/Ygor_Conditional_Forest_Predict.cc | 11 ++++-------
 binaries/Ygor_Conditional_Forest_Train.cc   | 11 ++++-------
 binaries/Ygor_Stochastic_Forest_Predict.cc  | 11 ++++-------
 binaries/Ygor_Stochastic_Forest_Train.cc    | 13 +++++--------
 6 files changed, 25 insertions(+), 43 deletions(-)

diff --git a/binaries/Ygor_CI_Tree_Predict.cc b/binaries/Ygor_CI_Tree_Predict.cc
index 35dd04f..264591e 100644
--- a/binaries/Ygor_CI_Tree_Predict.cc
+++ b/binaries/Ygor_CI_Tree_Predict.cc
@@ -73,17 +73,14 @@ int main(int argc, char **argv){
 
     while(std::getline(fi, line)){
         if(line.empty()) continue;
-        if(has_header && first_data_line){
-            has_header = false;
-            if(line.find('\t') != std::string::npos){
-                delimiter = '\t';
-            }
-            continue;
-        }
         if(first_data_line){
             if(line.find('\t') != std::string::npos){
                 delimiter = '\t';
             }
+            if(has_header){
+                first_data_line = false;
+                continue;
+            }
             first_data_line = false;
         }
 
diff --git a/binaries/Ygor_CI_Tree_Train.cc b/binaries/Ygor_CI_Tree_Train.cc
index 721898f..1bf35ab 100644
--- a/binaries/Ygor_CI_Tree_Train.cc
+++ b/binaries/Ygor_CI_Tree_Train.cc
@@ -92,17 +92,14 @@ int main(int argc, char **argv){
 
     while(std::getline(fi, line)){
         if(line.empty()) continue;
-        if(has_header && first_data_line){
-            has_header = false;
-            if(line.find('\t') != std::string::npos){
-                delimiter = '\t';
-            }
-            continue;
-        }
         if(first_data_line){
             if(line.find('\t') != std::string::npos){
                 delimiter = '\t';
             }
+            if(has_header){
+                first_data_line = false;
+                continue;
+            }
             first_data_line = false;
         }
 
diff --git a/binaries/Ygor_Conditional_Forest_Predict.cc b/binaries/Ygor_Conditional_Forest_Predict.cc
index ff5c079..e232f27 100644
--- a/binaries/Ygor_Conditional_Forest_Predict.cc
+++ b/binaries/Ygor_Conditional_Forest_Predict.cc
@@ -73,17 +73,14 @@ int main(int argc, char **argv){
 
     while(std::getline(fi, line)){
         if(line.empty()) continue;
-        if(has_header && first_data_line){
-            has_header = false;
-            if(line.find('\t') != std::string::npos){
-                delimiter = '\t';
-            }
-            continue;
-        }
         if(first_data_line){
             if(line.find('\t') != std::string::npos){
                 delimiter = '\t';
             }
+            if(has_header){
+                first_data_line = false;
+                continue;
+            }
             first_data_line = false;
         }
 
diff --git a/binaries/Ygor_Conditional_Forest_Train.cc b/binaries/Ygor_Conditional_Forest_Train.cc
index fe07eab..633bae1 100644
--- a/binaries/Ygor_Conditional_Forest_Train.cc
+++ b/binaries/Ygor_Conditional_Forest_Train.cc
@@ -134,17 +134,14 @@ int main(int argc, char **argv){
 
     while(std::getline(fi, line)){
         if(line.empty()) continue;
-        if(has_header && first_data_line){
-            has_header = false;
-            if(line.find('\t') != std::string::npos){
-                delimiter = '\t';
-            }
-            continue;
-        }
         if(first_data_line){
             if(line.find('\t') != std::string::npos){
                 delimiter = '\t';
             }
+            if(has_header){
+                first_data_line = false;
+                continue;
+            }
             first_data_line = false;
         }
 
diff --git a/binaries/Ygor_Stochastic_Forest_Predict.cc b/binaries/Ygor_Stochastic_Forest_Predict.cc
index 3aac094..48659a3 100644
--- a/binaries/Ygor_Stochastic_Forest_Predict.cc
+++ b/binaries/Ygor_Stochastic_Forest_Predict.cc
@@ -73,17 +73,14 @@ int main(int argc, char **argv){
 
     while(std::getline(fi, line)){
         if(line.empty()) continue;
-        if(has_header && first_data_line){
-            has_header = false;
-            if(line.find('\t') != std::string::npos){
-                delimiter = '\t';
-            }
-            continue;
-        }
         if(first_data_line){
             if(line.find('\t') != std::string::npos){
                 delimiter = '\t';
             }
+            if(has_header){
+                first_data_line = false;
+                continue;
+            }
             first_data_line = false;
         }
 
diff --git a/binaries/Ygor_Stochastic_Forest_Train.cc b/binaries/Ygor_Stochastic_Forest_Train.cc
index 0c04822..a84bb2a 100644
--- a/binaries/Ygor_Stochastic_Forest_Train.cc
+++ b/binaries/Ygor_Stochastic_Forest_Train.cc
@@ -110,18 +110,15 @@ int main(int argc, char **argv){
 
     while(std::getline(fi, line)){
         if(line.empty()) continue;
-        if(has_header && first_data_line){
-            has_header = false;
-            // Auto-detect delimiter from header line.
-            if(line.find('\t') != std::string::npos){
-                delimiter = '\t';
-            }
-            continue;
-        }
         if(first_data_line){
+            // Auto-detect delimiter from first non-empty line.
             if(line.find('\t') != std::string::npos){
                 delimiter = '\t';
             }
+            if(has_header){
+                first_data_line = false;
+                continue;
+            }
             first_data_line = false;
         }