ddmms · alinelena · May 14, 2026 · May 14, 2026
diff --git a/.jules/bolt.md b/.jules/bolt.md
@@ -5,3 +5,15 @@
 ## 2024-05-19 - Caching YAML Load for Framework Registry
 **Learning:** `yaml.safe_load` on `frameworks.yml` within `load_framework_registry()` was taking ~2-3 ms per call and it was repeatedly called for every framework entry via `get_framework_config()`. This was a micro-bottleneck, especially when dealing with lists or multiple frameworks.
 **Action:** Applied the `@lru_cache` and `deepcopy` pattern successfully again to `load_framework_registry()` and `get_framework_config()` to avoid caching a mutable dictionary directly and avoid repeated YAML I/O parsing.
+
+## 2024-05-19 - Pandas Iteration Bottlenecks
+**Learning:** `iterrows()` is consistently used across the codebase (e.g., `calc_solvMPCONF196.py`, `gscdb138.py`) for iterating through DataFrames and is a major, known performance bottleneck (often 10-20x slower than alternatives).
+**Action:** Replace `iterrows()` with `itertuples(index=False, name=None)` when simple tuple indexing is sufficient, standard `itertuples()` for dot-notation access, or `to_dict('records')` when dictionary access patterns like `.get()` are required by downstream logic.
+
+## 2024-05-19 - Caching UI Layout Generation
+**Learning:** The `build_faqs()` component function was reading `faqs.yml` from disk synchronously on every render without caching, similar to previous issues discovered with `frameworks.yml`.
+**Action:** Apply `@functools.cache` to UI component generation functions that depend on static configuration files to eliminate repetitive disk I/O and parsing overhead.
+
+## 2024-05-19 - Hanging Tests in Restricted Network
+**Learning:** Running Pytest on heavy ML integration tests (like those in `calc_solvMPCONF196.py` and `calc_high_pressure_relaxation.py`) in environments with restricted network access causes the test suite to hang or timeout as the system attempts to download gigabytes of model weights (e.g., Torch, Mace) silently in the background.
+**Action:** When tests hang in this manner due to missing heavy dependencies that cannot be easily installed, use static analysis (`ruff check`) and `python -m py_compile` as the primary verification strategy to ensure the syntax and logic of refactored code is sound without triggering remote downloads.
diff --git a/ml_peg/app/utils/build_components.py b/ml_peg/app/utils/build_components.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from functools import cache
 from importlib import metadata
 from pathlib import Path
 import time
@@ -468,6 +469,7 @@ def build_plot_download_controls(graph_id: str) -> Div:
     )
 
 
+@cache
 def build_faqs() -> Div:
     """
     Build FAQ section with collapsible dropdowns from YAML file.

diff --git a/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py b/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py
@@ -235,7 +235,7 @@ def run_elasticity_benchmark(
 
     # Save relaxed structures to extxyz for visualisation
     atoms_list = []
-    for _, row in results.iterrows():
+    for row in results.to_dict("records"):
         struct = row.get("final_structure")
         if struct is not None:
             atoms = AseAtomsAdaptor.get_atoms(struct).copy()

diff --git a/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py b/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py
@@ -85,9 +85,9 @@ def get_ref_energies(data_path: Path) -> dict[str, float]:
     )
     ref_energies = {}
 
-    for row in df.iterrows():
-        label = row[1][0]
-        ref_energies[label] = float(row[1][2]) * KCAL_TO_EV
+    for row in df.itertuples(index=False, name=None):
+        label = row[0]
+        ref_energies[label] = float(row[2]) * KCAL_TO_EV
 
     return ref_energies
 

diff --git a/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py b/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py
@@ -83,9 +83,9 @@ def get_ref_energies(data_path: Path) -> dict[str, float]:
     )
     ref_energies = {}
 
-    for row in df.iterrows():
-        label = row[1][0]
-        e_ref = float(row[1][1]) * units.Hartree
+    for row in df.itertuples(index=False, name=None):
+        label = row[0]
+        e_ref = float(row[1]) * units.Hartree
         ref_energies[label] = e_ref
 
     return ref_energies

diff --git a/ml_peg/calcs/utils/gscdb138.py b/ml_peg/calcs/utils/gscdb138.py
@@ -105,11 +105,11 @@ def run_gscdb138(
         df_refs["Reference"] *= units.Hartree
 
         # Calculate relative energy for each entry.
-        for _, row in tqdm(df_refs.iterrows(), dataset, total=df_refs.shape[0]):
+        for row in tqdm(df_refs.itertuples(), dataset, total=df_refs.shape[0]):
             atoms_list = []
-            identifier = row["Reaction"]
-            reactions = row["Stoichiometry"].split(",")  # Parse stoichiometry string.
-            e_rel_ref = row["Reference"]
+            identifier = row.Reaction
+            reactions = row.Stoichiometry.split(",")  # Parse stoichiometry string.
+            e_rel_ref = row.Reference
             num_species = len(reactions) // 2  # Each species has coefficient and name.
 
             e_rel_model = 0