update for prediction logic how ckpts with class labels

aditya0by0 · aditya0by0 · commit a1cdacaffba6 · 2026-01-08T20:05:54.000+01:00
diff --git a/README.md b/README.md
@@ -81,10 +81,10 @@ python3  chebai/result/prediction.py predict_from_file --checkpoint_path=[path-t
 
 * **`--smiles_file_path`**: Path to a text file containing one SMILES string per line.
 
-* **`--save_to`** *(optional)*: Predictions will be saved to the path as CSV file. The CSV will contain one row per SMILES string and one column per predicted class.
-
-* **`--classes_path`** *(optional)*: Path to the dataset’s `raw/classes.txt` file, which maps model output indices to ChEBI IDs.
+* **`--save_to`** *(optional)*: Predictions will be saved to the path as CSV file. The CSV will contain one row per SMILES string and one column per predicted class. Default path will be the current working directory with file name as `predictions.csv`.
 
+* **`--classes_path`** *(optional)*: Path to the dataset’s `classes.txt` file, which maps model output indices to ChEBI IDs.
+  * Checkpoints created after PR #135 will have the classification labels stored in them and hence this parameter is not required. 
   * If provided, the CSV columns will be named using the ChEBI IDs.
   * If omitted, then script will located the file automatically. If unable to locate then the columns will be numbered sequentially.
 
diff --git a/chebai/result/prediction.py b/chebai/result/prediction.py
@@ -62,6 +62,19 @@ def __init__(
         )
         print("*" * 10, f"Loaded model class: {self._model.__class__.__name__}")
 
+        self._classification_labels: list | None = ckpt_file.get(
+            "classification_labels", None
+        )
+        if self._classification_labels is not None:
+            print(f"Loaded {len(self._classification_labels)} classification labels.")
+            assert len(self._classification_labels) > 0, (
+                "Classification labels list is empty."
+            )
+            assert len(self._classification_labels) == self._model.out_dim, (
+                f"Number of class labels ({len(self._classification_labels)}) does not match "
+                f"the model output dimension ({self._model.out_dim})."
+            )
+
         if compile_model:
             self._model = torch.compile(self._model)
         self._model.eval()
@@ -92,7 +105,10 @@ def _add_class_columns(class_file_path: _PATH) -> list[str]:
             with open(class_file_path, "r") as f:
                 return [cls.strip() for cls in f.readlines()]
 
-        if classes_path is not None:
+        if self._classification_labels is not None:
+            CLASS_LABELS = self._classification_labels
+        # --- For old checkpoints that do not have classification_labels saved ---
+        elif classes_path is not None:
             CLASS_LABELS = _add_class_columns(classes_path)
         elif os.path.exists(self._dm.classes_txt_file_path):
             CLASS_LABELS = _add_class_columns(self._dm.classes_txt_file_path)
@@ -102,6 +118,7 @@ def _add_class_columns(class_file_path: _PATH) -> list[str]:
             print("No valid predictions were made. (All predictions are None.)")
             return
 
+        # --- Logic for old checkpoints that do not have classification_labels saved ---
         if CLASS_LABELS is not None and self._model.out_dim is not None:
             assert len(CLASS_LABELS) > 0, "Class labels list is empty."
             assert len(CLASS_LABELS) == self._model.out_dim, (