From b7cf7167794e9b65cbda866aa665656ce26e43a0 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Thu, 28 May 2026 06:17:01 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20[performance=20improvement]?=
 =?UTF-8?q?=20Optimize=20pandas=20DataFrame=20iteration=20speed?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaced inefficient `iterrows()` calls with `itertuples(index=False, name=None)` or `to_dict("records")` across various calculation modules to significantly boost data processing speed.

- Modified `ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py`
- Modified `ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py`
- Modified `ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py`
- Modified `ml_peg/calcs/utils/gscdb138.py`

Co-authored-by: alinelena <3306823+alinelena@users.noreply.github.com>
---
 ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py     | 2 +-
 ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py         | 6 +++---
 ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py | 6 +++---
 ml_peg/calcs/utils/gscdb138.py                              | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py b/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py
index ced3f247b..ccec35bbf 100644
--- a/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py
+++ b/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py
@@ -289,7 +289,7 @@ def run_elasticity_benchmark(
 
     # Save relaxed structures to extxyz for visualisation
     atoms_list = []
-    for _, row in results.iterrows():
+    for row in results.to_dict("records"):
         struct = row.get("final_structure")
         if not isinstance(struct, Structure):
             continue
diff --git a/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py b/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py
index a033fabf2..14bf43bc4 100644
--- a/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py
+++ b/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py
@@ -86,9 +86,9 @@ def get_ref_energies(data_path: Path) -> dict[str, float]:
     )
     ref_energies = {}
 
-    for row in df.iterrows():
-        label = row[1][0]
-        ref_energies[label] = float(row[1][2]) * KCAL_TO_EV
+    for row in df.itertuples(index=False, name=None):
+        label = row[0]
+        ref_energies[label] = float(row[2]) * KCAL_TO_EV
 
     return ref_energies
 
diff --git a/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py b/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py
index be51974af..ca5c464fd 100644
--- a/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py
+++ b/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py
@@ -84,9 +84,9 @@ def get_ref_energies(data_path: Path) -> dict[str, float]:
     )
     ref_energies = {}
 
-    for row in df.iterrows():
-        label = row[1][0]
-        e_ref = float(row[1][1]) * units.Hartree
+    for row in df.itertuples(index=False, name=None):
+        label = row[0]
+        e_ref = float(row[1]) * units.Hartree
         ref_energies[label] = e_ref
 
     return ref_energies
diff --git a/ml_peg/calcs/utils/gscdb138.py b/ml_peg/calcs/utils/gscdb138.py
index 0fc26c1e0..1745fa497 100644
--- a/ml_peg/calcs/utils/gscdb138.py
+++ b/ml_peg/calcs/utils/gscdb138.py
@@ -106,7 +106,7 @@ def run_gscdb138(
         df_refs["Reference"] *= units.Hartree
 
         # Calculate relative energy for each entry.
-        for _, row in tqdm(df_refs.iterrows(), dataset, total=df_refs.shape[0]):
+        for row in tqdm(df_refs.to_dict("records"), dataset, total=df_refs.shape[0]):
             atoms_list = []
             identifier = row["Reaction"]
             reactions = row["Stoichiometry"].split(",")  # Parse stoichiometry string.