diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e58b7fb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+.DS_Store
+__pycache__/
+*.py[cod]
+
+.venv/
+.venv311/
+.venv313/
+
+model/
+holdout_outputs/
+training_data/
+holdout_data/
+scores.txt
diff --git a/evaluate_model.py b/evaluate_model.py
index 6fc9466..e9eb53c 100644
--- a/evaluate_model.py
+++ b/evaluate_model.py
@@ -83,10 +83,10 @@ def standardize_bool(val):
         label = df_labels.loc[patient, id_labels]
         labels[i] = label
         if patient in df_predictions.index:   # Set missing predictions to 0.
-            binary_prediction = float(df_predictions.loc[patient, id_binary_predictions])
+            binary_prediction = float(np.real(df_predictions.loc[patient, id_binary_predictions]))
             if binary_prediction == 0 or binary_prediction == 1:   # Set invalid binary predictions to 0.
                 binary_predictions[i] = binary_prediction
-            probability_prediction = float(df_predictions.loc[patient, id_probability_predictions])
+            probability_prediction = float(np.real(df_predictions.loc[patient, id_probability_predictions]))
             if np.isfinite(probability_prediction):   # Set invalid probability predictions to 0.
                 probability_predictions[i] = probability_prediction
 
diff --git a/team_code.py b/team_code.py
index 910c4ed..280a8dd 100644
--- a/team_code.py
+++ b/team_code.py
@@ -3,544 +3,1040 @@
 # Edit this script to add your team's code. Some functions are *required*, but you can edit most parts of the required functions,
 # change or remove non-required functions, and add your own functions.
 
-################################################################################
-#
-# Optional libraries, functions, and variables. You can change or remove them.
-#
-################################################################################
-
-import joblib
-import numpy as np
 import os
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 import sys
+
+os.environ.setdefault("LOKY_MAX_CPU_COUNT", "1")
+
+try:
+    import joblib
+except ModuleNotFoundError as e:
+    raise ModuleNotFoundError(
+        "No module named 'joblib'. This usually means you're running a different Python interpreter than the one you installed packages into.\n\n"
+        f"Active interpreter: {sys.executable}\n\n"
+        "Fix by installing dependencies into *this* interpreter, e.g.:\n"
+        f"  {sys.executable} -m pip install -r requirements.txt\n"
+    ) from e
+
+import numpy as np
+import pandas as pd
+from scipy.signal import welch
+from sklearn.base import clone
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import ExtraTreesClassifier, HistGradientBoostingClassifier, RandomForestClassifier, VotingClassifier
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import balanced_accuracy_score, roc_auc_score
+from sklearn.model_selection import GroupKFold, StratifiedKFold, cross_val_score
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 from tqdm import tqdm
 
-from helper_code import *
+from helper_code import (
+    ALGORITHMIC_ANNOTATIONS_SUBFOLDER,
+    DEMOGRAPHICS_FILE,
+    HEADERS,
+    PHYSIOLOGICAL_DATA_SUBFOLDER,
+    derive_bipolar_signal,
+    find_patients,
+    load_rename_rules,
+    load_signal_data,
+    standardize_channel_names_rename_only,
+)
 
 ################################################################################
-# Path & Constant Configuration (Added for Robustness)
+#
+# Configuration
+#
 ################################################################################
 
-# Get the absolute directory where this script is located
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
-
-# Build the absolute path to the CSV file relative to the script location
-DEFAULT_CSV_PATH = os.path.join(SCRIPT_DIR, 'channel_table.csv')
+DEFAULT_CSV_PATH = os.path.join(SCRIPT_DIR, "channel_table.csv")
+DEFAULT_RANDOM_STATE = 56
+MAX_SIGNAL_SAMPLES = 120_000
+SPECTRAL_MIN_SAMPLES = 128
+MODEL_THRESHOLD = 0.5
+USE_PHYSIOLOGY_FEATURES = True
+MAX_CV_SPLITS = 5
+MODEL_SELECTION_TOLERANCE = 0.01
+ENABLED_PHYSIOLOGY_MODALITIES = ("eeg", "eog", "ecg", "resp", "spo2")
+
+MODALITY_CANDIDATES = (
+    ("eeg", ("f3-m2", "f4-m1", "c3-m2", "c4-m1", "o1-m2", "o2-m1")),
+    ("eog", ("e1-m2", "e2-m1")),
+    ("chin", ("chin1-chin2", "chin")),
+    ("leg", ("lat", "rat")),
+    ("ecg", ("ecg", "ekg", "hr")),
+    ("resp", ("airflow", "ptaf", "abd", "chest")),
+    ("spo2", ("spo2", "sao2")),
+)
+
+BIPOLAR_CONFIGS = (
+    ("f3-m2", "f3", ("m2",)),
+    ("f4-m1", "f4", ("m1",)),
+    ("c3-m2", "c3", ("m2",)),
+    ("c4-m1", "c4", ("m1",)),
+    ("o1-m2", "o1", ("m2",)),
+    ("o2-m1", "o2", ("m1",)),
+    ("e1-m2", "e1", ("m2",)),
+    ("e2-m1", "e2", ("m1",)),
+    ("chin1-chin2", "chin 1", ("chin 2",)),
+    ("lat", "lleg+", ("lleg-",)),
+    ("rat", "rleg+", ("rleg-",)),
+)
+
+DEMOGRAPHIC_FEATURE_DIM = 15
+SIGNAL_SUMMARY_DIM = 15
+ACTIVE_MODALITY_CANDIDATES = tuple(
+    (name, candidates) for name, candidates in MODALITY_CANDIDATES if name in ENABLED_PHYSIOLOGY_MODALITIES
+)
+PHYSIOLOGICAL_FEATURE_DIM = len(ACTIVE_MODALITY_CANDIDATES) * SIGNAL_SUMMARY_DIM if USE_PHYSIOLOGY_FEATURES else 0
+ALGORITHMIC_FEATURE_DIM = 18
 
 
 ################################################################################
 #
-# Required functions. Edit these functions to add your code, but do not change the arguments for the functions.
+# Required functions
 #
 ################################################################################
 
-# Train your models. This function is *required*. You should edit this function to add your code, but do *not* change the arguments
-# of this function. If you do not train one of the models, then you can return None for the model.
-
-# Train your model.
 def train_model(data_folder, model_folder, verbose, csv_path=DEFAULT_CSV_PATH):
-    # Find the data files.
     if verbose:
-        print('Finding the Challenge data...')
+        print("Finding the Challenge data...")
 
-    patient_data_file = os.path.join(data_folder, DEMOGRAPHICS_FILE)
-    patient_metadata_list = find_patients(patient_data_file)
+    demographics_file = os.path.join(data_folder, DEMOGRAPHICS_FILE)
+    metadata_df = pd.read_csv(demographics_file)
+    patient_metadata_list = find_patients(demographics_file)
     num_records = len(patient_metadata_list)
 
     if num_records == 0:
-        raise FileNotFoundError('No data were provided.')
+        raise FileNotFoundError("No data were provided.")
+
+    metadata_lookup = build_metadata_lookup(metadata_df)
 
-    # Extract the features and labels from the data.
     if verbose:
-        print('Extracting features and labels from the data...')
-
-    # Iterate over the records to extract the features and labels.
-    features = list()
-    labels = list()
-    
-    pbar = tqdm(range(num_records), desc="Extracting Features", unit="record", disable=not verbose)
-    for i in pbar:
-        try:
-            # Extract identifiers for this specific record
-            record = patient_metadata_list[i]
-            patient_id = record[HEADERS['bids_folder']]
-            site_id    = record[HEADERS['site_id']]
-            session_id = record[HEADERS['session_id']]
+        print("Extracting features and labels from the data...")
 
-            if verbose:
-                pbar.set_postfix({"patient": patient_id})
+    features = []
+    labels = []
+    groups = []
 
-            # Load the patient data.
-            patient_data_file = os.path.join(data_folder, DEMOGRAPHICS_FILE)
-            patient_data = load_demographics(patient_data_file, patient_id, session_id)
-            demographic_features = extract_demographic_features(patient_data)
+    iterator = tqdm(patient_metadata_list, desc="Extracting Features", unit="record", disable=not verbose)
+    for record in iterator:
+        patient_id = normalize_identifier(record.get(HEADERS["bids_folder"]))
+        site_id = normalize_identifier(record.get(HEADERS["site_id"]))
+        session_id = normalize_identifier(record.get(HEADERS["session_id"]))
 
-            # Load signal data.
+        if verbose:
+            iterator.set_postfix({"patient": patient_id})
 
-            # Load the physiological signal.
-            physiological_data_file = os.path.join(data_folder, PHYSIOLOGICAL_DATA_SUBFOLDER, site_id, f"{patient_id}_ses-{session_id}.edf")
-            # --- Check if the file actually exists before proceeding ---
-            if not os.path.exists(physiological_data_file):
-                if verbose:
-                    print(f"  ! Missing physiological data for {patient_id}. Skipping...")
-                continue # skip record
-            physiological_data, physiological_fs = load_signal_data(physiological_data_file)
-            physiological_features = extract_physiological_features(physiological_data, physiological_fs, csv_path=csv_path) # This function can rename, re-reference, resample, etc. the signal data.
-
-            # Load the algorithmic annotations.
-            algorithmic_annotations_file = os.path.join(data_folder, ALGORITHMIC_ANNOTATIONS_SUBFOLDER, site_id, f"{patient_id}_ses-{session_id}_caisr_annotations.edf")
-            algorithmic_annotations, algorithmic_fs = load_signal_data(algorithmic_annotations_file)
-            algorithmic_features = extract_algorithmic_annotations_features(algorithmic_annotations)
-
-            # Load the human annotations; these data will not be available in the hidden validation and test sets.
-            human_annotations_file = os.path.join(data_folder, HUMAN_ANNOTATIONS_SUBFOLDER, site_id, f"{patient_id}_ses-{session_id}_expert_annotations.edf")
-            human_annotations, human_fs = load_signal_data(human_annotations_file)
-            human_features = extract_human_annotations_features(human_annotations)
-
-            # Load the diagnoses; these data will not be available in the hidden validation and test sets.
-            diagnosis_file = os.path.join(data_folder, DEMOGRAPHICS_FILE)
-            label = load_diagnoses(diagnosis_file, patient_id)
-
-            # Store the features and labels, but
-            # the human annotations are not available on the hidden validation and test sets, but you
-            # may want to consider how to use them for training.
-            if label == 0 or label == 1:
-                features.append(np.hstack([demographic_features, physiological_features, algorithmic_features]))
+        try:
+            metadata = metadata_lookup.get(make_record_key(site_id, patient_id, session_id), {})
+            feature_vector = build_feature_vector(
+                metadata=metadata,
+                data_folder=data_folder,
+                site_id=site_id,
+                patient_id=patient_id,
+                session_id=session_id,
+                csv_path=csv_path,
+            )
+            label = extract_label(metadata)
+
+            if label in (0, 1):
+                features.append(feature_vector)
                 labels.append(label)
-
-            if 'physiological_data' in locals(): del physiological_data
-            if 'algorithmic_annotations' in locals(): del algorithmic_annotations
-
-        except Exception as e:
-            # If an error occurs (e.g., a record is corrupted), log it and move to the next
-            tqdm.write(f"  !!! Error processing record {i+1} ({patient_id}): {e}")
+                groups.append(make_group_id(site_id, patient_id))
+        except Exception as exc:
+            tqdm.write(f"  !!! Error processing record {patient_id} (session {session_id}): {exc}")
             continue
 
-    pbar.close()
+    if len(features) == 0:
+        raise ValueError(
+            "No usable training examples were found. "
+            "Check that demographics.csv is present and contains valid labels."
+        )
 
     features = np.asarray(features, dtype=np.float32)
-    labels = np.asarray(labels, dtype=bool)
+    labels = np.asarray(labels, dtype=np.int8)
+    groups = np.asarray(groups, dtype=object)
 
-    # Train the models on the features.
     if verbose:
-        print('Training the model on the data...')
-
-    # This very simple model trains a random forest model with very simple features.
+        print("Selecting and training the model...")
 
-    # Define the parameters for the random forest classifier and regressor.
-    n_estimators = 12  # Number of trees in the forest.
-    max_leaf_nodes = 34  # Maximum number of leaf nodes in each tree.
-    random_state = 56  # Random state; set for reproducibility.
+    model_name, model, cv_auc, threshold = select_and_fit_model(features, labels, groups, verbose)
 
-    # Fit the model.
-    model = RandomForestClassifier(
-        n_estimators=n_estimators, max_leaf_nodes=max_leaf_nodes, random_state=random_state).fit(features, labels)
+    artifact = {
+        "model": model,
+        "model_name": model_name,
+        "cv_auc": cv_auc,
+        "threshold": threshold,
+        "feature_dim": int(features.shape[1]),
+        "feature_version": 2,
+    }
 
-    # Create a folder for the model if it does not already exist.
     os.makedirs(model_folder, exist_ok=True)
-
-    # Save the model.
-    save_model(model_folder, model)
+    save_model(model_folder, artifact)
 
     if verbose:
-        print('Done.')
+        if cv_auc is None:
+            print(f"Selected model: {model_name}")
+        else:
+            print(f"Selected model: {model_name} (mean CV AUROC {cv_auc:.4f})")
+        print(f"Decision threshold: {threshold:.3f}")
+        print("Done.")
         print()
 
-# Load your trained models. This function is *required*. You should edit this function to add your code, but do *not* change the
-# arguments of this function. If you do not train one of the models, then you can return None for the model.
-def load_model(model_folder, verbose):
-    model_filename = os.path.join(model_folder, 'model.sav')
-    model = joblib.load(model_filename)
-    return model
-
-# Run your trained model. This function is *required*. You should edit this function to add your code, but do *not* change the
-# arguments of this function.
-def run_model(model, record, data_folder, verbose):
-    # Load the model.
-    model = model['model']
-
-    # Extract identifiers from the record dictionary
-    patient_id = record[HEADERS['bids_folder']]
-    site_id    = record[HEADERS['site_id']]
-    session_id = record[HEADERS['session_id']]
-
-    # Load the patient data.
-    patient_data_file = os.path.join(data_folder, DEMOGRAPHICS_FILE)
-    patient_data = load_demographics(patient_data_file, patient_id, session_id)
-    demographic_features = extract_demographic_features(patient_data)
-
-    # Load signal data.
-    phys_file = os.path.join(data_folder, PHYSIOLOGICAL_DATA_SUBFOLDER, site_id, f"{patient_id}_ses-{session_id}.edf")
-    if os.path.exists(phys_file):
-        phys_data, phys_fs = load_signal_data(phys_file)
-        # Ensure csv_path is accessible or defined
-        physiological_features = extract_physiological_features(phys_data, phys_fs)
-    else:
-        # Fallback to zeros if file is missing (length 49)
-        physiological_features = np.zeros(49)
-
-    # Load Algorithmic Annotations
-    algo_file = os.path.join(data_folder, ALGORITHMIC_ANNOTATIONS_SUBFOLDER, site_id, f"{patient_id}_ses-{session_id}_caisr_annotations.edf")
-    if os.path.exists(algo_file):
-        algo_data, _ = load_signal_data(algo_file)
-        algorithmic_features = extract_algorithmic_annotations_features(algo_data)
-    else:
-        # Fallback to zeros (length 12)
-        algorithmic_features = np.zeros(12)
 
-    features = np.hstack([demographic_features, physiological_features, algorithmic_features]).reshape(1, -1)
+def load_model(model_folder, verbose):
+    model_filename = os.path.join(model_folder, "model.sav")
+    return joblib.load(model_filename)
 
-    # Get the model outputs.
-    binary_output = model.predict(features)[0]
-    probability_output = model.predict_proba(features)[0][1]
 
+def run_model(model, record, data_folder, verbose):
+    artifact = model if isinstance(model, dict) else {"model": model, "threshold": MODEL_THRESHOLD}
+    estimator = artifact.get("model", artifact)
+    threshold = float(artifact.get("threshold", MODEL_THRESHOLD))
+
+    patient_id = normalize_identifier(record.get(HEADERS["bids_folder"]))
+    site_id = normalize_identifier(record.get(HEADERS["site_id"]))
+    session_id = normalize_identifier(record.get(HEADERS["session_id"]))
+
+    demographics_file = os.path.join(data_folder, DEMOGRAPHICS_FILE)
+    metadata = load_metadata_row(demographics_file, site_id, patient_id, session_id)
+
+    features = build_feature_vector(
+        metadata=metadata,
+        data_folder=data_folder,
+        site_id=site_id,
+        patient_id=patient_id,
+        session_id=session_id,
+    ).reshape(1, -1)
+
+    binary_output, probability_output = predict_binary_and_probability(estimator, features, threshold)
     return binary_output, probability_output
 
+
 ################################################################################
 #
-# Optional functions. You can change or remove these functions and/or add new functions.
+# Feature engineering
 #
 ################################################################################
 
+def build_feature_vector(metadata, data_folder, site_id, patient_id, session_id, csv_path=DEFAULT_CSV_PATH):
+    demographic_features = extract_demographic_features(metadata)
+
+    if USE_PHYSIOLOGY_FEATURES:
+        physiological_data_file = os.path.join(
+            data_folder,
+            PHYSIOLOGICAL_DATA_SUBFOLDER,
+            str(site_id),
+            f"{patient_id}_ses-{session_id}.edf",
+        )
+        if os.path.exists(physiological_data_file):
+            physiological_data, physiological_fs = load_signal_data(physiological_data_file)
+            physiological_features = extract_physiological_features(
+                physiological_data,
+                physiological_fs,
+                csv_path=csv_path,
+            )
+        else:
+            physiological_features = np.zeros(PHYSIOLOGICAL_FEATURE_DIM, dtype=np.float32)
+    else:
+        physiological_features = np.zeros(PHYSIOLOGICAL_FEATURE_DIM, dtype=np.float32)
+
+    algorithmic_annotations_file = os.path.join(
+        data_folder,
+        ALGORITHMIC_ANNOTATIONS_SUBFOLDER,
+        str(site_id),
+        f"{patient_id}_ses-{session_id}_caisr_annotations.edf",
+    )
+    if os.path.exists(algorithmic_annotations_file):
+        algorithmic_annotations, _ = load_signal_data(algorithmic_annotations_file)
+        algorithmic_features = extract_algorithmic_annotations_features(algorithmic_annotations)
+    else:
+        algorithmic_features = np.zeros(ALGORITHMIC_FEATURE_DIM, dtype=np.float32)
+
+    feature_vector = np.concatenate([demographic_features, physiological_features, algorithmic_features])
+    return sanitize_feature_vector(feature_vector)
+
+
 def extract_demographic_features(data):
-    """
-    Extracts and encodes demographic features from a metadata dictionary.
-    
-    Inputs:
-        data (dict): A dictionary containing patient metadata (e.g., from a CSV row).
-    
-    Returns:
-        np.array: A feature vector of length 11:
-            - [0]: Age (Continuous)
-            - [1:4]: Sex (One-hot: Female, Male, Other/Unknown)
-            - [4:9]: Race (One-hot: Asian, Black, Other, Unavailable, White)
-            - [9]: BMI (Continuous)
-    """
-    # 1. Age Feature (1 dimension)
-    # Convert 'Age' to a float; default to 0 if missing
-    age = np.array([load_age(data)])
-
-    # 2. Sex One-Hot Encoding (3 dimensions: Female, Male, Other/Unknown)
-    # Uses lowercase prefix matching to handle variants like 'F', 'Female', 'M', or 'Male'
-    sex = load_sex(data)
-    sex_vec = np.zeros(3)
-    if sex == 'Female': 
-        sex_vec[0] = 1 # Index 0: Female
-    elif sex == 'Male': 
-        sex_vec[1] = 1 # Index 1: Male
-    else: 
-        sex_vec[2] = 1 # Index 2: Other/Unknown
-
-    # 3. Race One-Hot Encoding (6 dimensions)
-    # Standardizes the raw text into one of six categories using the helper function
-    race_category = get_standardized_race(data).lower()
-    race_vec = np.zeros(5)
-    # Pre-defined mapping for index consistency
-    race_mapping = {'asian': 0, 'black': 1, 'others': 2, 'unavailable': 3, 'white': 4}
-    race_vec[race_mapping.get(race_category, 2)] = 1
-
-    # 4. Body Mass Index (BMI) Feature (1 dimension)
-    # Extracts the pre-calculated mean BMI; handles strings, NaNs, and missing keys
-    bmi = np.array([load_bmi(data)])
-
-    # 5. Concatenate all components into a single vector (1 + 3 + 5 + 1 = 10)
-    
-    return np.concatenate([age, sex_vec, race_vec, bmi])
+    age = safe_float(data.get(HEADERS["age"]))
+    bmi = safe_float(data.get(HEADERS["bmi"]))
+
+    sex_vec = np.zeros(3, dtype=np.float32)
+    sex = standardize_sex(data.get(HEADERS["sex"]))
+    sex_vec[{"female": 0, "male": 1}.get(sex, 2)] = 1.0
+
+    race_vec = np.zeros(5, dtype=np.float32)
+    race = standardize_race(data.get(HEADERS["race"]))
+    race_vec[{"asian": 0, "black": 1, "other": 2, "unavailable": 3, "white": 4}[race]] = 1.0
+
+    ethnicity_vec = np.zeros(3, dtype=np.float32)
+    ethnicity = standardize_ethnicity(data.get(HEADERS["ethnicity"]))
+    ethnicity_vec[{"hispanic": 0, "not_hispanic": 1, "unavailable": 2}[ethnicity]] = 1.0
+
+    features = np.array(
+        [
+            age,
+            bmi,
+            float(np.isnan(age)),
+            float(np.isnan(bmi)),
+        ],
+        dtype=np.float32,
+    )
+    features[0] = 0.0 if np.isnan(features[0]) else features[0]
+    features[1] = 0.0 if np.isnan(features[1]) else features[1]
+
+    return np.concatenate([features, sex_vec, race_vec, ethnicity_vec]).astype(np.float32)
 
 
 def extract_physiological_features(physiological_data, physiological_fs, csv_path=DEFAULT_CSV_PATH):
-    """
-    Standardizes channels and extracts statistical/spectral features.
-    """
-    original_labels = list(physiological_data.keys())
+    if not physiological_data:
+        return np.zeros(PHYSIOLOGICAL_FEATURE_DIM, dtype=np.float32)
 
-    # Step 1: Load rules and standardize names
-    # Note: Use script-relative path or absolute path for robustness
     rename_rules = load_rename_rules(os.path.abspath(csv_path))
+    original_labels = list(physiological_data.keys())
     rename_map, cols_to_drop = standardize_channel_names_rename_only(original_labels, rename_rules)
 
-    # Step 2: Apply renaming to BOTH signals and their corresponding FS
     processed_channels = {}
     processed_fs = {}
-    for old_label, data in physiological_data.items():
+
+    for old_label, signal in physiological_data.items():
         if old_label in cols_to_drop:
             continue
+
+        if old_label not in physiological_fs:
+            continue
+
         new_label = rename_map.get(old_label, old_label.lower())
-        processed_channels[new_label] = data
-        # Mapping the sampling rate to the new label
-        if old_label in physiological_fs:
-            processed_fs[new_label] = physiological_fs[old_label]
-        else:
-            # Report error and stop if no FS is found for a kept channel
-            raise KeyError(f"Sampling frequency (fs) not found for channel '{old_label}' ")
-        
-    if 'physiological_data' in locals(): del physiological_data
-
-    # Step 3: Construct Bipolar Derivations
-    bipolar_configs = [
-        ('f3-m2', 'f3', ['m2']), ('f4-m1', 'f4', ['m1']),
-        ('c3-m2', 'c3', ['m2']), ('c4-m1', 'c4', ['m1']),
-        ('o1-m2', 'o1', ['m2']), ('o2-m1', 'o2', ['m1']),
-        ('e1-m2', 'e1', ['m2']), ('e2-m1', 'e2', ['m1']),
-        ('chin1-chin2', 'chin 1', ['chin 2']),
-        ('lat', 'lleg+', ['lleg-']), ('rat', 'rleg+', ['rleg-'])
-    ]
+        processed_channels[new_label] = np.asarray(signal, dtype=np.float64)
+        processed_fs[new_label] = float(physiological_fs[old_label])
 
-    for target, pos, neg_list in bipolar_configs:
-        # 1. Skip if target already exists or pos channel missing
-        if target in processed_channels or pos not in processed_channels:
+    for target, pos_label, neg_labels in BIPOLAR_CONFIGS:
+        if target in processed_channels or pos_label not in processed_channels:
             continue
-        
-        # 2. Check all neg channels exist
-        if not all(n in processed_channels for n in neg_list):
+        if not all(label in processed_channels for label in neg_labels):
             continue
 
-        # 3. Check sampling rate consistency
-        all_involved = [pos] + neg_list
-        fs_values = [processed_fs[ch] for ch in all_involved]
-        
-        if len(set(fs_values)) > 1:
-            raise ValueError(f"Sampling rate mismatch for {target}: {dict(zip(all_involved, fs_values))}")
-
-        # 4. Derive bipolar signal
-        ref_sig = processed_channels[neg_list[0]] if len(neg_list) == 1 else tuple(processed_channels[n] for n in neg_list)
-        
-        derived = derive_bipolar_signal(processed_channels[pos], ref_sig)
-        
-        if derived is not None:
-            processed_channels[target] = derived
-            processed_fs[target] = processed_fs[pos]
-
-    leads_to_check = {
-        'eeg':  ['f3-m2', 'f4-m1', 'c3-m2', 'c4-m1'],
-        'eog':  ['e1-m2', 'e2-m1'],
-        'chin': ['chin1-chin2', 'chin'],
-        'leg':  ['lat', 'rat'],
-        'ecg':  ['ecg', 'ekg'],
-        'resp': ['airflow', 'ptaf', 'abd', 'chest'],
-        'spo2': ['spo2', 'sao2'] # Added sao2 as fallback for spo2
-    }
-    
-    final_features = []
-    for lead_type, candidates in leads_to_check.items():
-        sig = None
-        fs = None
-        
-        # Identify the first available candidate
-        for candidate in candidates:
-            if candidate in processed_channels and processed_channels[candidate] is not None:
-                sig = processed_channels[candidate]
-                fs = processed_fs.get(candidate)
-                break 
-
-        # if sig is not None and len(sig) > 0 and fs is not None:
-        #     # --- 1. Time Domain Features ---
-        #     std_val = np.std(sig)
-        #     mav_val = np.mean(np.abs(sig))
-        #     energy_val = np.sum(sig**2) / len(sig)
-            
-        #     # --- 2. Frequency Domain Features (Spectral) ---
-        #     n = len(sig)
-        #     # Correct spacing for frequency axis based on channel-specific fs
-        #     freqs = np.fft.rfftfreq(n, d=1/fs)
-            
-        #     # Compute Power Spectral Density (PSD)
-        #     # Multiplied by 2 for rfft (except DC/Nyquist) and divided by fs for density
-        #     fft_res = np.abs(np.fft.rfft(sig))
-        #     psd = (fft_res**2) / (n * fs)
-            
-        #     # Define band masks
-        #     delta_mask = (freqs >= 0.5) & (freqs <= 4)
-        #     theta_mask = (freqs > 4) & (freqs <= 8)
-        #     alpha_mask = (freqs > 8) & (freqs <= 12)
-            
-        #     # Calculate power in bands using trapezoidal integration for physical accuracy
-        #     delta_p = np.trapezoid(psd[delta_mask], freqs[delta_mask]) if np.any(delta_mask) else 0.0
-        #     theta_p = np.trapezoid(psd[theta_mask], freqs[theta_mask]) if np.any(theta_mask) else 0.0
-        #     alpha_p = np.trapezoid(psd[alpha_mask], freqs[alpha_mask]) if np.any(alpha_mask) else 0.0
-            
-        #     # Ratio biomarker: Delta/Theta (Indicator of cognitive slowing)
-        #     dt_ratio = delta_p / theta_p if theta_p > 0 else 0.0
-
-        #     final_features.extend([std_val, mav_val, energy_val, delta_p, theta_p, alpha_p, dt_ratio])
-
-        if sig is not None and len(sig) > 1:
-            # --- Time Domain Features (Very Fast) ---
-            std_val = np.std(sig)
-            mav_val = np.mean(np.abs(sig))
-            
-            # Zero Crossing Rate (Proxy for frequency/slowing)
-            zcr = np.mean(np.diff(np.sign(sig)) != 0)
-            
-            # Root Mean Square
-            rms = np.sqrt(np.mean(sig**2))
-            
-            # Signal Activity (Variance)
-            activity = np.var(sig)
-            
-            # Mobility (Hjorth Parameter) - Proxy for mean frequency
-            # sqrt(var(diff(sig)) / var(sig))
-            diff_sig = np.diff(sig)
-            mobility = np.sqrt(np.var(diff_sig) / activity) if activity > 0 else 0.0
-
-            # Complexity (Hjorth Parameter) - Proxy for bandwidth
-            diff2_sig = np.diff(diff_sig)
-            var_d2 = np.var(diff2_sig)
-            var_d1 = np.var(diff_sig)
-            complexity = (np.sqrt(var_d2 / var_d1) / mobility) if (var_d1 > 0 and mobility > 0) else 0.0
-
-            final_features.extend([std_val, mav_val, zcr, rms, activity, mobility, complexity])
+        fs_values = [processed_fs.get(pos_label)] + [processed_fs.get(label) for label in neg_labels]
+        if any(value is None for value in fs_values):
+            continue
+        if len({float(value) for value in fs_values}) > 1:
+            continue
+
+        reference = processed_channels[neg_labels[0]] if len(neg_labels) == 1 else tuple(processed_channels[label] for label in neg_labels)
+        derived_signal = derive_bipolar_signal(processed_channels[pos_label], reference)
+        if derived_signal is None:
+            continue
 
+        processed_channels[target] = np.asarray(derived_signal, dtype=np.float64)
+        processed_fs[target] = float(fs_values[0])
+
+    features = []
+    for _, candidates in ACTIVE_MODALITY_CANDIDATES:
+        summaries = []
+        for candidate in candidates:
+            if candidate not in processed_channels:
+                continue
+            summary = summarize_signal(processed_channels[candidate], processed_fs.get(candidate))
+            if np.any(summary):
+                summaries.append(summary)
+
+        if summaries:
+            modality_summary = np.mean(np.vstack(summaries), axis=0)
+            availability = min(len(summaries), 3) / 3.0
+            features.extend(modality_summary.tolist())
+            features.append(float(availability))
         else:
-            # Padding: 7 features per lead type
-            final_features.extend([0.0] * 7)
+            features.extend([0.0] * (SIGNAL_SUMMARY_DIM - 1))
+            features.append(0.0)
+
+    return sanitize_feature_vector(np.asarray(features, dtype=np.float32))
 
-    if 'processed_channels' in locals(): del processed_channels
 
-    return np.array(final_features)
+def summarize_signal(signal, fs):
+    values = np.asarray(signal, dtype=np.float64).reshape(-1)
+    values = values[np.isfinite(values)]
+
+    if values.size < 2:
+        return np.zeros(SIGNAL_SUMMARY_DIM - 1, dtype=np.float32)
+
+    step = max(1, int(np.ceil(values.size / MAX_SIGNAL_SAMPLES)))
+    sampled = values[::step]
+    effective_fs = float(fs) / step if fs not in (None, 0) else 0.0
+
+    median = np.median(sampled)
+    centered = sampled - median
+    diff = np.diff(centered)
+
+    q05, q25, q50, q75, q95 = np.percentile(sampled, [5, 25, 50, 75, 95])
+    activity = np.var(centered)
+    diff_var = np.var(diff) if diff.size else 0.0
+    diff2 = np.diff(diff)
+    diff2_var = np.var(diff2) if diff2.size else 0.0
+    mobility = np.sqrt(diff_var / activity) if activity > 0 and diff_var > 0 else 0.0
+    complexity = np.sqrt(diff2_var / diff_var) / mobility if diff_var > 0 and mobility > 0 else 0.0
+
+    zcr = np.mean((centered[:-1] * centered[1:]) < 0) if centered.size > 1 else 0.0
+    line_length = np.mean(np.abs(diff)) if diff.size else 0.0
+    mad = np.median(np.abs(sampled - q50))
+    rms = np.sqrt(np.mean(sampled ** 2))
+
+    dominant_freq = 0.0
+    spectral_entropy = 0.0
+    if effective_fs > 0 and sampled.size >= SPECTRAL_MIN_SAMPLES:
+        try:
+            freqs, power = welch(centered, fs=effective_fs, nperseg=min(1024, sampled.size))
+            power = np.asarray(power, dtype=np.float64)
+            power = np.maximum(power, 0.0)
+            total_power = power.sum()
+            if total_power > 0 and freqs.size > 0:
+                dominant_freq = float(freqs[np.argmax(power)])
+                normalized_power = power / total_power
+                nz = normalized_power > 0
+                spectral_entropy = float(
+                    -(normalized_power[nz] * np.log(normalized_power[nz])).sum() / np.log(normalized_power.size)
+                )
+        except Exception:
+            dominant_freq = 0.0
+            spectral_entropy = 0.0
+
+    summary = np.array(
+        [
+            np.std(sampled),
+            mad,
+            q75 - q25,
+            np.mean(np.abs(sampled)),
+            rms,
+            q05,
+            q50,
+            q95,
+            zcr,
+            line_length,
+            mobility,
+            complexity,
+            dominant_freq,
+            spectral_entropy,
+        ],
+        dtype=np.float32,
+    )
+    return sanitize_feature_vector(summary)
+
 
 def extract_algorithmic_annotations_features(algo_data):
-    """
-    Extracts sleep architecture and event density features from CAISR outputs.
-    Output vector length: 12
-    """
     if not algo_data:
-        return np.zeros(12)
+        return np.zeros(ALGORITHMIC_FEATURE_DIM, dtype=np.float32)
+
+    total_seconds = 0
+    for key in ("resp_caisr", "arousal_caisr", "limb_caisr", "stage_caisr"):
+        if key in algo_data:
+            total_seconds = max(total_seconds, len(algo_data[key]))
+    total_hours = total_seconds / 3600.0 if total_seconds > 0 else 0.0
 
     features = []
+    for key in ("resp_caisr", "arousal_caisr", "limb_caisr"):
+        index_per_hour, mean_duration_minutes = summarize_event_trace(algo_data.get(key), total_hours)
+        features.extend([index_per_hour, mean_duration_minutes])
 
-    # --- 1. Respiratory & Arousal Event Densities ---
-    # Total duration in hours (assuming 1Hz for event traces)
-    # If the signal exists, we calculate events per hour (Index)
-    total_hours = len(algo_data.get('resp_caisr', [])) / 3600.0
-    
-    def count_discrete_events(key):
-        if key not in algo_data or total_hours <= 0:
-            return 0.0
-        
-        sig = algo_data[key].astype(float)
-        # Create a binary mask: 1 if there is an event, 0 if not
-        binary_sig = (sig > 0).astype(int)
-        
-        # Detect rising edges: 0 to 1 transition
-        # diff will be 1 at the start of an event, -1 at the end
-        diff = np.diff(binary_sig, prepend=0)
-        num_events = np.count_nonzero(diff == 1)
-        
-        return num_events / total_hours
-    
-    ahi_auto = count_discrete_events('resp_caisr')      # Automated Apnea-Hypopnea Index
-    arousal_auto = count_discrete_events('arousal_caisr') # Automated Arousal Index
-    limb_auto = count_discrete_events('limb_caisr')    # Automated Limb Movement Index
-    
-    features.extend([ahi_auto, arousal_auto, limb_auto])
-
-    # --- 2. Sleep Architecture (from stage_caisr) ---
-    # Standard labels: 5=W, 4=R, 3=N1, 2=N2, 1=N3 (or similar mapping)
-    stages = algo_data.get('stage_caisr', np.array([]))
-    # Filter out invalid/background values (like the 9.0 in your sample)
-    valid_stages = stages[stages < 9.0]
-    
-    if len(valid_stages) > 0:
-        total_epochs = len(valid_stages)
-        # Percentage of each stage
+    stages = np.asarray(algo_data.get("stage_caisr", np.array([])), dtype=np.float64)
+    valid_stages = stages[np.isfinite(stages) & (stages < 9.0)]
+    if valid_stages.size:
         w_pct = np.mean(valid_stages == 5)
         r_pct = np.mean(valid_stages == 4)
         n1_pct = np.mean(valid_stages == 3)
         n2_pct = np.mean(valid_stages == 2)
         n3_pct = np.mean(valid_stages == 1)
-        
-        # Sleep Efficiency: (N1+N2+N3+R) / Total
         efficiency = np.mean((valid_stages >= 1) & (valid_stages <= 4))
+        transitions_per_hour = (
+            np.count_nonzero(np.diff(valid_stages)) / total_hours if total_hours > 0 and valid_stages.size > 1 else 0.0
+        )
+        rem_indices = np.where(valid_stages == 4)[0]
+        rem_latency_hours = (float(rem_indices[0]) * 30.0 / 3600.0) if rem_indices.size else 0.0
+        stage_distribution = np.array([w_pct, n1_pct, n2_pct, n3_pct, r_pct], dtype=np.float64)
+        positive = stage_distribution > 0
+        stage_entropy = float(
+            -(stage_distribution[positive] * np.log(stage_distribution[positive])).sum() / np.log(stage_distribution.size)
+        ) if positive.any() else 0.0
     else:
-        w_pct = n1_pct = n2_pct = n3_pct = r_pct = efficiency = 0.0
-
-    features.extend([w_pct, n1_pct, n2_pct, n3_pct, r_pct, efficiency])
-
-    # --- 3. Model Confidence / Uncertainty ---
-    # Mean probability of Wake and REM (indicators of sleep stability)
-    # We use the raw probability traces
-    prob_w = np.mean(algo_data.get('caisr_prob_w', [0]))
-    prob_n3 = np.mean(algo_data.get('caisr_prob_n3', [0]))
-    prob_arous = np.mean(algo_data.get('caisr_prob_arous', [0]))
-    
-    # Standardize '9.0' or other filler values to 0
-    clean_prob = lambda x: x if x < 1.0 else 0.0
-    features.extend([clean_prob(prob_w), clean_prob(prob_n3), clean_prob(prob_arous)])
-
-    return np.array(features)
-
-def extract_human_annotations_features(human_data):
-    """
-    Extracts features from expert-scored human annotations.
-    Output vector length: 12 (to match algorithmic feature length)
-    """
-    # If data is missing (common in hidden test sets), return a zero vector
-    if not human_data or 'resp_expert' not in human_data:
-        return np.zeros(12)
+        w_pct = r_pct = n1_pct = n2_pct = n3_pct = efficiency = transitions_per_hour = rem_latency_hours = stage_entropy = 0.0
 
-    features = []
+    features.extend([w_pct, n1_pct, n2_pct, n3_pct, r_pct, efficiency, transitions_per_hour, rem_latency_hours, stage_entropy])
 
-    # --- 1. Human Event Indices (Events per Hour) ---
-    # Total duration in hours based on 1Hz signal
-    total_seconds = len(human_data.get('resp_expert', []))
-    total_hours = total_seconds / 3600.0
-    
-    def count_discrete_events(key):
-        if key not in human_data or total_hours <= 0:
-            return 0.0
-        sig = (human_data[key] > 0).astype(int)
-        # Identify the start of each continuous event block
-        diff = np.diff(sig, prepend=0)
-        return np.count_nonzero(diff == 1) / total_hours
-
-    ahi_human = count_discrete_events('resp_expert')      # Human AHI
-    arousal_human = count_discrete_events('arousal_expert') # Human Arousal Index
-    limb_human = count_discrete_events('limb_expert')       # Human PLMI
-    
-    features.extend([ahi_human, arousal_human, limb_human])
-
-    # --- 2. Human Sleep Architecture ---
-    # Standard labels: 0=W, 1=N1, 2=N2, 3=N3, 4=R, 5=Unknown/Movement
-    stages = human_data.get('stage_expert', np.array([]))
-    
-    # Filter out label 5 (often used by experts for movement/unscored)
-    valid_mask = (stages < 9.0)
-    valid_stages = stages[valid_mask]
-    
-    if len(valid_stages) > 0:
-        w_pct = np.mean(valid_stages == 5)
-        r_pct = np.mean(valid_stages == 4)
-        n1_pct = np.mean(valid_stages == 3)
-        n2_pct = np.mean(valid_stages == 2)
-        n3_pct = np.mean(valid_stages == 1)
-        efficiency = np.mean(valid_stages > 0)
+    features.extend(
+        [
+            mean_clean_probability(algo_data.get("caisr_prob_w")),
+            mean_clean_probability(algo_data.get("caisr_prob_n3")),
+            mean_clean_probability(algo_data.get("caisr_prob_arous")),
+        ]
+    )
+
+    return sanitize_feature_vector(np.asarray(features, dtype=np.float32))
+
+
+################################################################################
+#
+# Model selection and prediction
+#
+################################################################################
+
+def select_and_fit_model(features, labels, groups, verbose):
+    unique_labels, counts = np.unique(labels, return_counts=True)
+    if unique_labels.size < 2:
+        constant_label = int(unique_labels[0])
+        model = DummyClassifier(strategy="constant", constant=constant_label)
+        model.fit(features, labels)
+        return "constant_dummy", model, None, MODEL_THRESHOLD
+
+    candidate_factories = get_candidate_factories()
+    candidate_map = {name: factory for name, factory in candidate_factories}
+    best_name = "extra_trees"
+    best_score = -np.inf
+    best_model = None
+
+    splitters = build_cv_splitters(labels, groups)
+    for splitter in splitters:
+        splitter_scored = False
+        for name, factory in candidate_factories:
+            try:
+                candidate = factory()
+                if splitter["kind"] == "group":
+                    scores = evaluate_grouped_auc(candidate, features, labels, groups, splitter["cv"])
+                else:
+                    scores = cross_val_score(candidate, features, labels, cv=splitter["cv"], scoring="roc_auc", n_jobs=1)
+                score = float(np.mean(scores))
+                splitter_scored = True
+                if verbose:
+                    print(f"  {name}: mean CV AUROC {score:.4f} ({splitter['label']})")
+            except Exception as exc:
+                if verbose:
+                    print(f"  {name}: CV failed ({exc})")
+                continue
+
+            if is_better_model(name, score, best_name, best_score):
+                best_name = name
+                best_score = score
+
+        if splitter_scored:
+            break
+
+    if best_score == -np.inf:
+        best_score = None
+
+    for name, factory in candidate_factories:
+        if name == best_name:
+            best_model = factory()
+            break
+
+    if best_model is None:
+        best_model = candidate_factories[0][1]()
+        best_name = candidate_factories[0][0]
+
+    threshold = estimate_optimal_threshold(candidate_map[best_name], features, labels, groups, verbose)
+    best_model.fit(features, labels)
+    return best_name, best_model, best_score, threshold
+
+
+def get_candidate_factories():
+    def extra_trees_base():
+        return ExtraTreesClassifier(
+            n_estimators=500,
+            max_depth=12,
+            min_samples_leaf=3,
+            min_samples_split=8,
+            max_features="sqrt",
+            class_weight="balanced",
+            random_state=DEFAULT_RANDOM_STATE,
+            n_jobs=1,
+        )
+
+    def random_forest_base():
+        return RandomForestClassifier(
+            n_estimators=500,
+            max_depth=10,
+            min_samples_leaf=3,
+            min_samples_split=8,
+            max_features="sqrt",
+            class_weight="balanced_subsample",
+            random_state=DEFAULT_RANDOM_STATE,
+            n_jobs=1,
+        )
+
+    def hist_gradient_boosting_base():
+        return HistGradientBoostingClassifier(
+            learning_rate=0.04,
+            max_iter=250,
+            max_depth=4,
+            max_leaf_nodes=31,
+            min_samples_leaf=24,
+            l2_regularization=0.5,
+            early_stopping=False,
+            random_state=DEFAULT_RANDOM_STATE,
+        )
+
+    def extra_trees():
+        return Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="median")),
+                (
+                    "model",
+                    extra_trees_base(),
+                ),
+            ]
+        )
+
+    def random_forest():
+        return Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="median")),
+                (
+                    "model",
+                    random_forest_base(),
+                ),
+            ]
+        )
+
+    def hist_gradient_boosting():
+        return Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="median")),
+                (
+                    "model",
+                    hist_gradient_boosting_base(),
+                ),
+            ]
+        )
+
+    def logistic_regression():
+        return Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="median")),
+                ("scaler", StandardScaler()),
+                (
+                    "model",
+                    LogisticRegression(
+                        C=0.5,
+                        class_weight="balanced",
+                        max_iter=3000,
+                        solver="lbfgs",
+                        random_state=DEFAULT_RANDOM_STATE,
+                    ),
+                ),
+            ]
+        )
+
+    def voting_ensemble():
+        return Pipeline(
+            [
+                ("imputer", SimpleImputer(strategy="median")),
+                (
+                    "model",
+                    VotingClassifier(
+                        estimators=[
+                            ("et", extra_trees_base()),
+                            ("rf", random_forest_base()),
+                            ("hgb", hist_gradient_boosting_base()),
+                            (
+                                "lr",
+                                Pipeline(
+                                    [
+                                        ("scaler", StandardScaler()),
+                                        (
+                                            "model",
+                                            LogisticRegression(
+                                                C=0.5,
+                                                class_weight="balanced",
+                                                max_iter=3000,
+                                                solver="lbfgs",
+                                                random_state=DEFAULT_RANDOM_STATE,
+                                            ),
+                                        ),
+                                    ]
+                                ),
+                            ),
+                        ],
+                        voting="soft",
+                        weights=[3, 2, 3, 1],
+                        n_jobs=1,
+                    ),
+                ),
+            ]
+        )
+
+    return [
+        ("voting_ensemble", voting_ensemble),
+        ("extra_trees", extra_trees),
+        ("random_forest", random_forest),
+        ("hist_gradient_boosting", hist_gradient_boosting),
+        ("logistic_regression", logistic_regression),
+    ]
+
+
+def predict_binary_and_probability(model, features, threshold):
+    features = sanitize_feature_vector(np.asarray(features, dtype=np.float32))
+
+    probability_output = 0.0
+    classes = getattr(model, "classes_", None)
+
+    if hasattr(model, "predict_proba"):
+        probabilities = np.asarray(model.predict_proba(features), dtype=np.float64)
+        if probabilities.ndim == 2 and probabilities.shape[0] > 0:
+            if probabilities.shape[1] == 1:
+                positive_class = int(classes[0]) if classes is not None and len(classes) == 1 else 0
+                probability_output = 1.0 if positive_class == 1 else 0.0
+            else:
+                positive_index = 1
+                if classes is not None and 1 in classes:
+                    positive_index = list(classes).index(1)
+                probability_output = float(probabilities[0, positive_index])
+    elif hasattr(model, "decision_function"):
+        score = float(np.ravel(model.decision_function(features))[0])
+        probability_output = float(1.0 / (1.0 + np.exp(-score)))
     else:
-        w_pct = n1_pct = n2_pct = n3_pct = r_pct = efficiency = 0.0
-
-    features.extend([w_pct, n1_pct, n2_pct, n3_pct, r_pct, efficiency])
-
-    # --- 3. Fragmentation & Stability (Replacing Probabilities) ---
-    # These metrics quantify how "broken" the sleep is, which is a key marker.
-    if len(valid_stages) > 1:
-        # Number of stage transitions
-        transitions = np.count_nonzero(np.diff(valid_stages)) / total_hours
-        # Wake After Sleep Onset (WASO) proxy: non-zero stages followed by zero
-        waso_minutes = (np.count_nonzero(valid_stages == 0) * 30) / 60.0
-        # REM Latency (epochs until first REM)
-        rem_indices = np.where(valid_stages == 4)[0]
-        rem_latency = rem_indices[0] if len(rem_indices) > 0 else 0.0
+        prediction = np.asarray(model.predict(features)).reshape(-1)
+        probability_output = float(prediction[0])
+
+    probability_output = float(np.clip(probability_output, 0.0, 1.0))
+    binary_output = bool(probability_output >= threshold)
+    return binary_output, probability_output
+
+
+################################################################################
+#
+# Metadata and parsing helpers
+#
+################################################################################
+
+def build_metadata_lookup(metadata_df):
+    lookup = {}
+    for _, row in metadata_df.iterrows():
+        record = row.to_dict()
+        site_id = normalize_identifier(record.get(HEADERS["site_id"]))
+        patient_id = normalize_identifier(record.get(HEADERS["bids_folder"]))
+        session_id = normalize_identifier(record.get(HEADERS["session_id"]))
+        lookup[make_record_key(site_id, patient_id, session_id)] = record
+    return lookup
+
+
+def load_metadata_row(demographics_file, site_id, patient_id, session_id):
+    metadata_df = pd.read_csv(demographics_file)
+    site_series = metadata_df[HEADERS["site_id"]].map(normalize_identifier)
+    patient_series = metadata_df[HEADERS["bids_folder"]].map(normalize_identifier)
+    session_series = metadata_df[HEADERS["session_id"]].map(normalize_identifier)
+    row = metadata_df[
+        (site_series == site_id)
+        & (patient_series == patient_id)
+        & (session_series == session_id)
+    ]
+    if row.empty:
+        return {}
+    return row.iloc[0].to_dict()
+
+
+def make_record_key(site_id, patient_id, session_id):
+    return (normalize_identifier(site_id), normalize_identifier(patient_id), normalize_identifier(session_id))
+
+
+def make_group_id(site_id, patient_id):
+    return f"{normalize_identifier(site_id)}::{normalize_identifier(patient_id)}"
+
+
+def is_better_model(candidate_name, candidate_score, best_name, best_score):
+    if candidate_score > best_score + MODEL_SELECTION_TOLERANCE:
+        return True
+    if abs(candidate_score - best_score) <= MODEL_SELECTION_TOLERANCE:
+        return get_model_priority(candidate_name) < get_model_priority(best_name)
+    return False
+
+
+def get_model_priority(model_name):
+    priorities = {
+        "logistic_regression": 0,
+        "hist_gradient_boosting": 1,
+        "voting_ensemble": 2,
+        "random_forest": 3,
+        "extra_trees": 4,
+        "constant_dummy": 5,
+    }
+    return priorities.get(model_name, 99)
+
+
+def build_cv_splitters(labels, groups):
+    labels = np.asarray(labels)
+    groups = np.asarray(groups, dtype=object)
+
+    splitters = []
+    unique_groups = np.unique(groups)
+    cv_splits = int(min(MAX_CV_SPLITS, np.unique(labels, return_counts=True)[1].min()))
+
+    if unique_groups.size >= 2 and unique_groups.size < labels.size:
+        group_splits = int(min(MAX_CV_SPLITS, unique_groups.size))
+        if group_splits >= 2:
+            splitters.append({
+                "kind": "group",
+                "cv": GroupKFold(n_splits=group_splits),
+                "label": f"grouped {group_splits}-fold CV",
+            })
+
+    if cv_splits >= 2:
+        splitters.append({
+            "kind": "stratified",
+            "cv": StratifiedKFold(n_splits=cv_splits, shuffle=True, random_state=DEFAULT_RANDOM_STATE),
+            "label": f"stratified {cv_splits}-fold CV",
+        })
+
+    return splitters
+
+
+def evaluate_grouped_auc(candidate, features, labels, groups, splitter):
+    scores = []
+    for train_idx, test_idx in splitter.split(features, labels, groups):
+        train_labels = labels[train_idx]
+        test_labels = labels[test_idx]
+        if np.unique(train_labels).size < 2 or np.unique(test_labels).size < 2:
+            continue
+
+        estimator = clone(candidate)
+        estimator.fit(features[train_idx], train_labels)
+
+        if hasattr(estimator, "predict_proba"):
+            probabilities = np.asarray(estimator.predict_proba(features[test_idx]), dtype=np.float64)
+            if probabilities.ndim != 2 or probabilities.shape[1] < 2:
+                continue
+            classes = getattr(estimator, "classes_", None)
+            positive_index = 1
+            if classes is not None and 1 in classes:
+                positive_index = list(classes).index(1)
+            fold_outputs = probabilities[:, positive_index]
+        elif hasattr(estimator, "decision_function"):
+            fold_outputs = np.ravel(estimator.decision_function(features[test_idx]))
+        else:
+            fold_outputs = np.ravel(estimator.predict(features[test_idx]))
+
+        scores.append(float(roc_auc_score(test_labels, fold_outputs)))
+
+    if not scores:
+        raise ValueError("Grouped CV produced no valid folds with both classes present.")
+
+    return np.asarray(scores, dtype=np.float64)
+
+
+def estimate_optimal_threshold(candidate_factory, features, labels, groups, verbose):
+    for splitter in build_cv_splitters(labels, groups):
+        try:
+            candidate = candidate_factory()
+            probabilities = generate_oof_probabilities(candidate, features, labels, groups, splitter)
+            threshold = choose_decision_threshold(labels, probabilities)
+            if verbose:
+                print(f"  threshold tuned at {threshold:.3f} using {splitter['label']}")
+            return threshold
+        except Exception as exc:
+            if verbose:
+                print(f"  threshold tuning failed ({splitter['label']}: {exc})")
+
+    return MODEL_THRESHOLD
+
+
+def generate_oof_probabilities(candidate, features, labels, groups, splitter):
+    probabilities = np.full(len(labels), np.nan, dtype=np.float64)
+
+    if splitter["kind"] == "group":
+        fold_iterator = splitter["cv"].split(features, labels, groups)
     else:
-        transitions = waso_minutes = rem_latency = 0.0
+        fold_iterator = splitter["cv"].split(features, labels)
+
+    for train_idx, test_idx in fold_iterator:
+        train_labels = labels[train_idx]
+        if np.unique(train_labels).size < 2:
+            raise ValueError("a training fold has only one class")
+
+        estimator = clone(candidate)
+        estimator.fit(features[train_idx], train_labels)
+        probabilities[test_idx] = predict_probability_scores(estimator, features[test_idx])
+
+    if np.any(~np.isfinite(probabilities)):
+        raise ValueError("OOF probability generation did not cover every sample")
+
+    return probabilities
+
+
+def choose_decision_threshold(labels, probabilities):
+    labels = np.asarray(labels, dtype=np.int8)
+    probabilities = np.asarray(probabilities, dtype=np.float64)
+
+    quantile_grid = np.quantile(probabilities, np.linspace(0.1, 0.9, 17))
+    threshold_grid = np.unique(
+        np.clip(
+            np.concatenate(([0.30, 0.35, 0.40, 0.45, 0.50, 0.55, 0.60, 0.65, 0.70], quantile_grid)),
+            0.05,
+            0.95,
+        )
+    )
+
+    best_threshold = MODEL_THRESHOLD
+    best_score = -np.inf
+
+    for threshold in threshold_grid:
+        predictions = (probabilities >= threshold).astype(np.int8)
+        score = float(balanced_accuracy_score(labels, predictions))
+        if score > best_score + 1e-12:
+            best_threshold = float(threshold)
+            best_score = score
+        elif abs(score - best_score) <= 1e-12 and abs(float(threshold) - 0.5) < abs(best_threshold - 0.5):
+            best_threshold = float(threshold)
+
+    return best_threshold
+
+
+def predict_probability_scores(model, features):
+    features = sanitize_feature_vector(np.asarray(features, dtype=np.float32))
+
+    if hasattr(model, "predict_proba"):
+        probabilities = np.asarray(model.predict_proba(features), dtype=np.float64)
+        if probabilities.ndim == 2 and probabilities.shape[0] > 0:
+            if probabilities.shape[1] == 1:
+                classes = getattr(model, "classes_", None)
+                positive_class = int(classes[0]) if classes is not None and len(classes) == 1 else 0
+                score = 1.0 if positive_class == 1 else 0.0
+                return np.full(features.shape[0], score, dtype=np.float64)
+
+            classes = getattr(model, "classes_", None)
+            positive_index = 1
+            if classes is not None and 1 in classes:
+                positive_index = list(classes).index(1)
+            return np.clip(probabilities[:, positive_index], 0.0, 1.0)
+
+    if hasattr(model, "decision_function"):
+        scores = np.ravel(model.decision_function(features))
+        return 1.0 / (1.0 + np.exp(-scores))
+
+    predictions = np.ravel(model.predict(features)).astype(np.float64)
+    return np.clip(predictions, 0.0, 1.0)
+
+
+
+def normalize_identifier(value):
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value.strip()
+    if isinstance(value, (int, np.integer)):
+        return str(int(value))
+    if isinstance(value, (float, np.floating)) and np.isfinite(value):
+        return str(int(value)) if float(value).is_integer() else str(value)
+    return str(value).strip()
+
+
+def safe_float(value):
+    try:
+        if value is None or (isinstance(value, str) and value.strip() == ""):
+            return np.nan
+        result = float(value)
+        return result if np.isfinite(result) else np.nan
+    except (TypeError, ValueError):
+        return np.nan
+
+
+def extract_label(metadata):
+    value = metadata.get(HEADERS["label"])
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"true", "1", "1.0", "t", "y", "yes"}:
+            return 1
+        if normalized in {"false", "0", "0.0", "f", "n", "no"}:
+            return 0
+        return None
+    if isinstance(value, (bool, np.bool_)):
+        return int(value)
+    if isinstance(value, (int, np.integer)):
+        return int(value) if int(value) in (0, 1) else None
+    if isinstance(value, (float, np.floating)) and np.isfinite(value):
+        value = int(value)
+        return value if value in (0, 1) else None
+    return None
+
+
+def standardize_sex(value):
+    normalized = str(value).strip().lower()
+    if normalized.startswith("f"):
+        return "female"
+    if normalized.startswith("m"):
+        return "male"
+    return "unknown"
+
+
+def standardize_race(value):
+    normalized = str(value).strip().lower()
+    if any(token in normalized for token in ("white", "caucasian")):
+        return "white"
+    if any(token in normalized for token in ("black", "african american")):
+        return "black"
+    if "asian" in normalized:
+        return "asian"
+    if normalized in {"", "unknown", "unavailable", "declined", "unreported", "nan", "none", "not specified"}:
+        return "unavailable"
+    return "other"
+
+
+def standardize_ethnicity(value):
+    normalized = str(value).strip().lower()
+    if any(token in normalized for token in ("not hispanic", "non-hispanic", "non hispanic", "not latino", "non-latino")):
+        return "not_hispanic"
+    if "hispanic" in normalized or "latino" in normalized:
+        return "hispanic"
+    if normalized in {"", "unknown", "unavailable", "declined", "unreported", "nan", "none", "not specified"}:
+        return "unavailable"
+    return "unavailable"
 
-    features.extend([transitions, waso_minutes, rem_latency])
 
-    return np.array(features)
+################################################################################
+#
+# Annotation and signal utilities
+#
+################################################################################
+
+def summarize_event_trace(trace, total_hours):
+    if trace is None or total_hours <= 0:
+        return 0.0, 0.0
+
+    signal = np.asarray(trace, dtype=np.float64).reshape(-1)
+    signal = np.where(np.isfinite(signal), signal, 0.0)
+    binary = (signal > 0).astype(np.int8)
+
+    if binary.size == 0:
+        return 0.0, 0.0
+
+    starts = np.flatnonzero(np.diff(np.pad(binary, (1, 0))) == 1)
+    ends = np.flatnonzero(np.diff(np.pad(binary, (0, 1))) == -1)
+    durations = (ends - starts).astype(np.float64)
+
+    index_per_hour = float(len(starts) / total_hours)
+    mean_duration_minutes = float(np.mean(durations) / 60.0) if durations.size else 0.0
+    return index_per_hour, mean_duration_minutes
 
 
-# Save your trained model.
-def save_model(model_folder, model):
-    d = {'model': model}
-    filename = os.path.join(model_folder, 'model.sav')
-    joblib.dump(d, filename, protocol=0)
\ No newline at end of file
+def mean_clean_probability(values):
+    if values is None:
+        return 0.0
+    arr = np.asarray(values, dtype=np.float64).reshape(-1)
+    arr = arr[np.isfinite(arr)]
+    arr = arr[(arr >= 0.0) & (arr <= 1.0)]
+    if arr.size == 0:
+        return 0.0
+    return float(np.mean(arr))
+
+
+def sanitize_feature_vector(values):
+    values = np.asarray(values, dtype=np.float32)
+    return np.nan_to_num(values, nan=0.0, posinf=0.0, neginf=0.0)
+
+
+################################################################################
+#
+# Persistence
+#
+################################################################################
+
+def save_model(model_folder, model_artifact):
+    filename = os.path.join(model_folder, "model.sav")
+    joblib.dump(model_artifact, filename, protocol=0)