From b7cf7167794e9b65cbda866aa665656ce26e43a0 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Thu, 28 May 2026 06:17:01 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20[performance=20improvement]?= =?UTF-8?q?=20Optimize=20pandas=20DataFrame=20iteration=20speed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced inefficient `iterrows()` calls with `itertuples(index=False, name=None)` or `to_dict("records")` across various calculation modules to significantly boost data processing speed. - Modified `ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py` - Modified `ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py` - Modified `ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py` - Modified `ml_peg/calcs/utils/gscdb138.py` Co-authored-by: alinelena <3306823+alinelena@users.noreply.github.com> --- ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py | 2 +- ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py | 6 +++--- ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py | 6 +++--- ml_peg/calcs/utils/gscdb138.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py b/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py index ced3f247b..ccec35bbf 100644 --- a/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py +++ b/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py @@ -289,7 +289,7 @@ def run_elasticity_benchmark( # Save relaxed structures to extxyz for visualisation atoms_list = [] - for _, row in results.iterrows(): + for row in results.to_dict("records"): struct = row.get("final_structure") if not isinstance(struct, Structure): continue diff --git a/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py b/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py index a033fabf2..14bf43bc4 100644 --- a/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py +++ b/ml_peg/calcs/conformers/MPCONF196/calc_MPCONF196.py @@ -86,9 +86,9 @@ def get_ref_energies(data_path: Path) -> dict[str, float]: ) ref_energies = {} - for row in df.iterrows(): - label = row[1][0] - ref_energies[label] = float(row[1][2]) * KCAL_TO_EV + for row in df.itertuples(index=False, name=None): + label = row[0] + ref_energies[label] = float(row[2]) * KCAL_TO_EV return ref_energies diff --git a/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py b/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py index be51974af..ca5c464fd 100644 --- a/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py +++ b/ml_peg/calcs/conformers/solvMPCONF196/calc_solvMPCONF196.py @@ -84,9 +84,9 @@ def get_ref_energies(data_path: Path) -> dict[str, float]: ) ref_energies = {} - for row in df.iterrows(): - label = row[1][0] - e_ref = float(row[1][1]) * units.Hartree + for row in df.itertuples(index=False, name=None): + label = row[0] + e_ref = float(row[1]) * units.Hartree ref_energies[label] = e_ref return ref_energies diff --git a/ml_peg/calcs/utils/gscdb138.py b/ml_peg/calcs/utils/gscdb138.py index 0fc26c1e0..1745fa497 100644 --- a/ml_peg/calcs/utils/gscdb138.py +++ b/ml_peg/calcs/utils/gscdb138.py @@ -106,7 +106,7 @@ def run_gscdb138( df_refs["Reference"] *= units.Hartree # Calculate relative energy for each entry. - for _, row in tqdm(df_refs.iterrows(), dataset, total=df_refs.shape[0]): + for row in tqdm(df_refs.to_dict("records"), dataset, total=df_refs.shape[0]): atoms_list = [] identifier = row["Reaction"] reactions = row["Stoichiometry"].split(",") # Parse stoichiometry string.