Warn when the validation-based features are used with very small validation set sizes

alexurba · alexurba · commit e3ac3df615c4 · 2026-03-19T22:17:11.000-04:00
diff --git a/docs/source/usage/torch_datasets.rst b/docs/source/usage/torch_datasets.rst
@@ -385,6 +385,13 @@ When providing a single ``dataset`` parameter to ``train()``, the trainer automa
    # Trainer handles split automatically
    pot.train(dataset=my_dataset, config=config)  # Uses testpercent
 
+.. note::
+
+   When ``testpercent > 0``, validation-driven features such as
+   ``use_scheduler=True`` and ``save_best=True`` become active. For very small
+   validation splits, prefer disabling those features or creating an explicit
+   train/test split with enough validation structures for stable monitoring.
+
 Manual Splitting
 ~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/usage/torch_training.rst b/docs/source/usage/torch_training.rst
@@ -131,6 +131,14 @@ structures held out for validation. The
 :class:`~aenet.io.train.TrainOut` object containing training history,
 statistics, and plotting helpers.
 
+.. note::
+
+   Setting ``testpercent > 0`` does more than hold out structures. It also
+   enables any validation-driven controls in your configuration, such as
+   ``use_scheduler=True`` and ``save_best=True``. On very small validation
+   splits, these controls can react to noisy metrics and change the training
+   behavior qualitatively.
+
 
 Force Training
 --------------
@@ -384,6 +392,10 @@ Checkpointing & Model Saving
    Save the model with the best validation loss as ``best_model.pt``.
    Requires ``testpercent > 0`` to compute validation loss.
 
+   For very small validation sets, the selected checkpoint can be unstable.
+   In that case prefer ``save_best=False`` or supply a larger or explicit
+   validation split.
+
 **Resuming Training**
 
 To resume training from a checkpoint, pass the checkpoint path to
@@ -442,6 +454,9 @@ adjusting the learning rate for optimal performance.
 .. note::
 
    The scheduler requires ``testpercent > 0`` to monitor validation loss.
+   With only a few validation structures, the monitored loss can be too noisy
+   for stable plateau detection. In that case prefer ``use_scheduler=False``
+   or a larger or explicit validation split.
 
 
 Force Training Parameters
diff --git a/src/aenet/torch_training/tests/test_trainer_smoke.py b/src/aenet/torch_training/tests/test_trainer_smoke.py
@@ -2,16 +2,15 @@
 from pathlib import Path
 
 import numpy as np
-import torch
-
 import pytest
+import torch
 
+from aenet.torch_featurize import ChebyshevDescriptor
 from aenet.torch_training import (
-    TorchTrainingConfig,
     Structure,
     TorchANNPotential,
+    TorchTrainingConfig,
 )
-from aenet.torch_featurize import ChebyshevDescriptor
 
 
 def make_simple_structures_H_two():
@@ -120,6 +119,77 @@ def test_energy_only_smoke(tmp_path: Path):
                and name.endswith(".pt") for name in files)
 
 
+@pytest.mark.cpu
+def test_warns_for_scheduler_with_tiny_validation_set():
+    structures = make_simple_structures_H_two()
+    descriptor = make_descriptor_H(dtype=torch.float64)
+    arch = make_arch_H(descriptor)
+
+    pot = TorchANNPotential(arch=arch, descriptor=descriptor)
+
+    cfg = TorchTrainingConfig(
+        iterations=1,
+        method=None,
+        testpercent=50,
+        force_weight=0.0,
+        atomic_energies={"H": 0.0},
+        memory_mode="cpu",
+        device="cpu",
+        save_energies=False,
+        checkpoint_dir=None,
+        checkpoint_interval=0,
+        max_checkpoints=None,
+        save_best=False,
+        use_scheduler=True,
+        show_progress=False,
+    )
+
+    with pytest.warns(
+        UserWarning,
+        match=r"use_scheduler=True with a validation set of only 1 structure",
+    ):
+        pot.train(
+            structures=structures,
+            config=cfg,
+        )
+
+
+@pytest.mark.cpu
+def test_warns_for_save_best_with_tiny_validation_set(tmp_path: Path):
+    structures = make_simple_structures_H_two()
+    descriptor = make_descriptor_H(dtype=torch.float64)
+    arch = make_arch_H(descriptor)
+
+    pot = TorchANNPotential(arch=arch, descriptor=descriptor)
+
+    ckpt_dir = tmp_path / "ckpts"
+    cfg = TorchTrainingConfig(
+        iterations=1,
+        method=None,
+        testpercent=50,
+        force_weight=0.0,
+        atomic_energies={"H": 0.0},
+        memory_mode="cpu",
+        device="cpu",
+        save_energies=False,
+        checkpoint_dir=str(ckpt_dir),
+        checkpoint_interval=1,
+        max_checkpoints=None,
+        save_best=True,
+        use_scheduler=False,
+        show_progress=False,
+    )
+
+    with pytest.warns(
+        UserWarning,
+        match=r"save_best=True with a validation set of only 1 structure",
+    ):
+        pot.train(
+            structures=structures,
+            config=cfg,
+        )
+
+
 @pytest.mark.cpu
 def test_force_training_smoke(tmp_path: Path):
     structures = make_simple_structures_H_two()
diff --git a/src/aenet/torch_training/trainer.py b/src/aenet/torch_training/trainer.py
@@ -57,6 +57,51 @@ def _resolve_device(config: TorchTrainingConfig) -> torch.device:
     return torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
+_SMALL_VALIDATION_WARNING_THRESHOLD = 10
+
+
+def _warn_on_small_validation_set(
+    *,
+    n_val: int,
+    use_scheduler: bool,
+    save_best: bool,
+) -> None:
+    """
+    Warn when validation-driven controls are enabled on a tiny split.
+
+    Parameters
+    ----------
+    n_val : int
+        Number of validation structures.
+    use_scheduler : bool
+        Whether ReduceLROnPlateau monitoring is enabled for this run.
+    save_best : bool
+        Whether best-checkpoint selection is enabled for this run.
+    """
+    if n_val <= 0 or n_val >= _SMALL_VALIDATION_WARNING_THRESHOLD:
+        return
+
+    noun = "structure" if n_val == 1 else "structures"
+
+    if use_scheduler:
+        warnings.warn(
+            "use_scheduler=True with a validation set of only "
+            f"{n_val} {noun} can make ReduceLROnPlateau react to noisy "
+            "metrics. Consider use_scheduler=False, a larger validation "
+            "split, or an explicit train/test split.",
+            UserWarning,
+        )
+
+    if save_best:
+        warnings.warn(
+            "save_best=True with a validation set of only "
+            f"{n_val} {noun} can select a checkpoint from a noisy "
+            "validation loss. Consider save_best=False, a larger "
+            "validation split, or an explicit train/test split.",
+            UserWarning,
+        )
+
+
 def _iter_progress(iterable, enable: bool, desc: str):
     """
     Wrap an iterable with tqdm progress bar if enabled and available.
@@ -788,6 +833,15 @@ def train(
             else None
         )
 
+        n_val = int(len(test_ds)) if test_ds is not None else 0
+        _warn_on_small_validation_set(
+            n_val=n_val,
+            use_scheduler=bool(config.use_scheduler) and (test_loader is not None),
+            save_best=bool(config.save_best)
+            and (config.checkpoint_dir is not None)
+            and (test_loader is not None),
+        )
+
         # Initialize normalization manager
         normalize_features = bool(getattr(config, "normalize_features", True))
         normalize_energy = bool(getattr(config, "normalize_energy", True))