From 5e3666114c66a29e95b710b34f9b59d77fec0133 Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Mon, 19 Jan 2026 16:46:23 +0000 Subject: [PATCH 01/12] Add RDB7 Calculation files --- .../molecular_reactions/rdb7/.dvc/.gitignore | 3 + .../molecular_reactions/rdb7/.dvc/config | 0 .../calcs/molecular_reactions/rdb7/.dvcignore | 3 + .../molecular_reactions/rdb7/calc_rdb7.py | 167 ++++++++++++++++++ 4 files changed, 173 insertions(+) create mode 100644 ml_peg/calcs/molecular_reactions/rdb7/.dvc/.gitignore create mode 100644 ml_peg/calcs/molecular_reactions/rdb7/.dvc/config create mode 100644 ml_peg/calcs/molecular_reactions/rdb7/.dvcignore create mode 100644 ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py diff --git a/ml_peg/calcs/molecular_reactions/rdb7/.dvc/.gitignore b/ml_peg/calcs/molecular_reactions/rdb7/.dvc/.gitignore new file mode 100644 index 000000000..528f30c71 --- /dev/null +++ b/ml_peg/calcs/molecular_reactions/rdb7/.dvc/.gitignore @@ -0,0 +1,3 @@ +/config.local +/tmp +/cache diff --git a/ml_peg/calcs/molecular_reactions/rdb7/.dvc/config b/ml_peg/calcs/molecular_reactions/rdb7/.dvc/config new file mode 100644 index 000000000..e69de29bb diff --git a/ml_peg/calcs/molecular_reactions/rdb7/.dvcignore b/ml_peg/calcs/molecular_reactions/rdb7/.dvcignore new file mode 100644 index 000000000..519730552 --- /dev/null +++ b/ml_peg/calcs/molecular_reactions/rdb7/.dvcignore @@ -0,0 +1,3 @@ +# Add patterns of files dvc should ignore, which could improve +# the performance. Learn more at +# https://dvc.org/doc/user-guide/dvcignore diff --git a/ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py b/ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py new file mode 100644 index 000000000..42fcbd390 --- /dev/null +++ b/ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py @@ -0,0 +1,167 @@ +""" +Calculate the RDB7 reaction barrier dataset. + +Spiekermann, K., Pattanaik, L. & Green, W.H. +High accuracy barrier heights, enthalpies, +and rate coefficients for chemical reactions. +Sci Data 9, 417 (2022) +https://doi.org/10.1038/s41597-022-01529-6 +""" + +from __future__ import annotations + +from pathlib import Path + +from ase import Atom, Atoms, units +from ase.io import write +import mlipx +from mlipx.abc import NodeWithCalculator +import numpy as np +from tqdm import tqdm +import zntrack + +from ml_peg.calcs.utils.utils import chdir, download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +KCAL_TO_EV = units.kcal / units.mol +EV_TO_KCAL = 1 / KCAL_TO_EV + +OUT_PATH = Path(__file__).parent / "outputs" + + +class RDB7Benchmark(zntrack.Node): + """Benchmark RDB7 reaction barriers from 10.1038/s41597-022-01529-6.""" + + model: NodeWithCalculator = zntrack.deps() + model_name: str = zntrack.params() + + @staticmethod + def get_cc_energy(fname): + """ + Read reference energy. + + Parameters + ---------- + fname + Name of the calculation output file. + + Returns + ------- + float + CCSD(T)-F12/cc-pVDZ-F12 energy. + """ + with open(fname) as lines: + for line in lines: + if "CCSD(T)-F12/cc-pVDZ-F12 energy" in line: + energy = float(line.strip().split()[-1]) * units.Hartree + break + return energy + + @staticmethod + def get_atoms_from_molpro(fname): + """ + Get ASE atoms from the molpro file. + + Parameters + ---------- + fname + Name of the calculation output file. + + Returns + ------- + ASE.Atoms + ASE atoms object of the structure. + """ + atoms = Atoms(None) + with open(fname) as lines: + read_started = False + for i, line in enumerate(lines): + if "ATOMIC COORDINATES" in line: + read_started = True + xyz_start = i + 4 + if read_started: + if i >= xyz_start: + items = line.strip().split() + if len(items) == 0: + break + position = ( + np.array( + [float(items[3]), float(items[4]), float(items[5])] + ) + * units.Bohr + ) + atoms += Atom(symbol=items[1], position=position) + atoms.info["charge"] = 0 + atoms.info["spin"] = 1 + return atoms + + def run(self): + """Run new benchmark.""" + data_path = ( + download_s3_data( + filename="RDB7.zip", + key="inputs/molecular_reactions/RDB7/RDB7.zip", + ) + / "RDB7" + ) + + # Read in data and attach calculator + + calc = self.model.get_calculator() + + for i in tqdm(range(0, 11961)): + bh_forward_ref = 0 + bh_forward_model = 0 + label = str(i).zfill(6) + for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("r*"): + bh_forward_ref -= self.get_cc_energy(qm_path) + atoms = self.get_atoms_from_molpro(qm_path) + atoms.calc = calc + bh_forward_model -= atoms.get_potential_energy() + for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("ts*"): + bh_forward_model += self.get_cc_energy(qm_path) + atoms = self.get_atoms_from_molpro(qm_path) + atoms.calc = calc + bh_forward_model += atoms.get_potential_energy() + + atoms.info["model_forward_barrier"] = bh_forward_model + atoms.info["ref_forward_barrier"] = bh_forward_ref + + write_dir = OUT_PATH / self.model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{label}_ts.xyz", atoms) + + +def build_project(repro: bool = False) -> None: + """ + Build mlipx project. + + Parameters + ---------- + repro + Whether to call dvc repro -f after building. + """ + project = mlipx.Project() + benchmark_node_dict = {} + + for model_name, model in MODELS.items(): + with project.group(model_name): + benchmark = RDB7Benchmark( + model=model, + model_name=model_name, + ) + benchmark_node_dict[model_name] = benchmark + + if repro: + with chdir(Path(__file__).parent): + project.repro(build=True, force=True) + else: + project.build() + + +def test_rdb7_barrier_heights(): + """Run RDB7 benchmark via pytest.""" + build_project(repro=True) From 7f0a3624316077f2702b57682f0205681112548f Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Mon, 19 Jan 2026 16:52:54 +0000 Subject: [PATCH 02/12] Add D3 calc --- ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py b/ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py index 42fcbd390..9e08a6111 100644 --- a/ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py +++ b/ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py @@ -111,6 +111,8 @@ def run(self): # Read in data and attach calculator calc = self.model.get_calculator() + # Add D3 calculator for this test + calc = self.model.add_d3_calculator(calc) for i in tqdm(range(0, 11961)): bh_forward_ref = 0 From 2c2ddb28627da4a147a4471bf865c64f01a1bb6e Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Mon, 19 Jan 2026 16:58:28 +0000 Subject: [PATCH 03/12] Add RDB7 Analysis --- .../molecular_reactions/rdb7/analysis_rdb7.py | 143 ++++++++++++++++++ .../molecular_reactions/rdb7/metrics.yml | 7 + 2 files changed, 150 insertions(+) create mode 100644 ml_peg/analysis/molecular_reactions/rdb7/analysis_rdb7.py create mode 100644 ml_peg/analysis/molecular_reactions/rdb7/metrics.yml diff --git a/ml_peg/analysis/molecular_reactions/rdb7/analysis_rdb7.py b/ml_peg/analysis/molecular_reactions/rdb7/analysis_rdb7.py new file mode 100644 index 000000000..e6941ee96 --- /dev/null +++ b/ml_peg/analysis/molecular_reactions/rdb7/analysis_rdb7.py @@ -0,0 +1,143 @@ +""" +Analyse the RDB7 reaction barrier dataset. + +Spiekermann, K., Pattanaik, L. & Green, W.H. +High accuracy barrier heights, enthalpies, +and rate coefficients for chemical reactions. +Sci Data 9, 417 (2022) +https://doi.org/10.1038/s41597-022-01529-6 +""" + +from __future__ import annotations + +from pathlib import Path + +from ase import units +from ase.io import read, write +import pytest + +from ml_peg.analysis.utils.decorators import build_table, plot_parity +from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +D3_MODEL_NAMES = build_d3_name_map(MODELS) + +KCAL_TO_EV = units.kcal / units.mol +EV_TO_KCAL = 1 / KCAL_TO_EV +CALC_PATH = CALCS_ROOT / "molecular_reactions" / "rdb7" / "outputs" +OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "rdb7" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def labels() -> list: + """ + Get list of system names. + + Returns + ------- + list + List of all system names. + """ + return [str(i).zfill(6) for i in range(0, 11961)] + + +@pytest.fixture +@plot_parity( + filename=OUT_PATH / "figure_rdb7_barriers.json", + title="Reaction barriers", + x_label="Predicted barrier / kcal/mol", + y_label="Reference barrier / kcal/mol", + hoverdata={ + "Labels": labels(), + }, +) +def barrier_heights() -> dict[str, list]: + """ + Get barrier heights for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted barrier heights. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + for label in labels(): + atoms = read(CALC_PATH / model_name / f"{label}_ts.xyz") + results[model_name].append(atoms.info["model_forward_barrier"] * EV_TO_KCAL) + if not ref_stored: + results["ref"].append(atoms.info["ref_forward_barrier"] * EV_TO_KCAL) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}_ts.xyz", atoms) + ref_stored = True + return results + + +@pytest.fixture +def get_mae(barrier_heights) -> dict[str, float]: + """ + Get mean absolute error for barrier heights. + + Parameters + ---------- + barrier_heights + Dictionary of reference and predicted barrier heights. + + Returns + ------- + dict[str, float] + Dictionary of predicted barrier height errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae(barrier_heights["ref"], barrier_heights[model_name]) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "rdb7_barriers_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(get_mae: dict[str, float]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + get_mae + Mean absolute errors for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return {"MAE": get_mae} + + +def test_rdb7_barriers(metrics: dict[str, dict]) -> None: + """ + Run rdb7_barriers test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + """ + return diff --git a/ml_peg/analysis/molecular_reactions/rdb7/metrics.yml b/ml_peg/analysis/molecular_reactions/rdb7/metrics.yml new file mode 100644 index 000000000..870ce5be4 --- /dev/null +++ b/ml_peg/analysis/molecular_reactions/rdb7/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAE: + good: 0.0 + bad: 20.0 + unit: kcal/mol + tooltip: Mean Absolute Error for all systems + level_of_theory: CCSD(T)-F12/cc-pVDZ-F12 From e3bd7de68963ca75f42707420c06f45d5e725723 Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Tue, 20 Jan 2026 13:45:35 +0000 Subject: [PATCH 04/12] Rename benchmark to RDB7 --- .../{rdb7/analysis_rdb7.py => RDB7/analysis_RDB7.py} | 4 ++-- .../analysis/molecular_reactions/{rdb7 => RDB7}/metrics.yml | 0 .../calcs/molecular_reactions/{rdb7 => RDB7}/.dvc/.gitignore | 0 ml_peg/calcs/molecular_reactions/{rdb7 => RDB7}/.dvc/config | 0 ml_peg/calcs/molecular_reactions/{rdb7 => RDB7}/.dvcignore | 0 .../{rdb7/calc_rdb7.py => RDB7/calc_RDB7.py} | 0 6 files changed, 2 insertions(+), 2 deletions(-) rename ml_peg/analysis/molecular_reactions/{rdb7/analysis_rdb7.py => RDB7/analysis_RDB7.py} (96%) rename ml_peg/analysis/molecular_reactions/{rdb7 => RDB7}/metrics.yml (100%) rename ml_peg/calcs/molecular_reactions/{rdb7 => RDB7}/.dvc/.gitignore (100%) rename ml_peg/calcs/molecular_reactions/{rdb7 => RDB7}/.dvc/config (100%) rename ml_peg/calcs/molecular_reactions/{rdb7 => RDB7}/.dvcignore (100%) rename ml_peg/calcs/molecular_reactions/{rdb7/calc_rdb7.py => RDB7/calc_RDB7.py} (100%) diff --git a/ml_peg/analysis/molecular_reactions/rdb7/analysis_rdb7.py b/ml_peg/analysis/molecular_reactions/RDB7/analysis_RDB7.py similarity index 96% rename from ml_peg/analysis/molecular_reactions/rdb7/analysis_rdb7.py rename to ml_peg/analysis/molecular_reactions/RDB7/analysis_RDB7.py index e6941ee96..4d40f0237 100644 --- a/ml_peg/analysis/molecular_reactions/rdb7/analysis_rdb7.py +++ b/ml_peg/analysis/molecular_reactions/RDB7/analysis_RDB7.py @@ -28,8 +28,8 @@ KCAL_TO_EV = units.kcal / units.mol EV_TO_KCAL = 1 / KCAL_TO_EV -CALC_PATH = CALCS_ROOT / "molecular_reactions" / "rdb7" / "outputs" -OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "rdb7" +CALC_PATH = CALCS_ROOT / "molecular_reactions" / "RDB7" / "outputs" +OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "RDB7" METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( diff --git a/ml_peg/analysis/molecular_reactions/rdb7/metrics.yml b/ml_peg/analysis/molecular_reactions/RDB7/metrics.yml similarity index 100% rename from ml_peg/analysis/molecular_reactions/rdb7/metrics.yml rename to ml_peg/analysis/molecular_reactions/RDB7/metrics.yml diff --git a/ml_peg/calcs/molecular_reactions/rdb7/.dvc/.gitignore b/ml_peg/calcs/molecular_reactions/RDB7/.dvc/.gitignore similarity index 100% rename from ml_peg/calcs/molecular_reactions/rdb7/.dvc/.gitignore rename to ml_peg/calcs/molecular_reactions/RDB7/.dvc/.gitignore diff --git a/ml_peg/calcs/molecular_reactions/rdb7/.dvc/config b/ml_peg/calcs/molecular_reactions/RDB7/.dvc/config similarity index 100% rename from ml_peg/calcs/molecular_reactions/rdb7/.dvc/config rename to ml_peg/calcs/molecular_reactions/RDB7/.dvc/config diff --git a/ml_peg/calcs/molecular_reactions/rdb7/.dvcignore b/ml_peg/calcs/molecular_reactions/RDB7/.dvcignore similarity index 100% rename from ml_peg/calcs/molecular_reactions/rdb7/.dvcignore rename to ml_peg/calcs/molecular_reactions/RDB7/.dvcignore diff --git a/ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py similarity index 100% rename from ml_peg/calcs/molecular_reactions/rdb7/calc_rdb7.py rename to ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py From ec96bc5fadd2e1349111f2d6c50b51bfe59ba8e8 Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Tue, 20 Jan 2026 14:12:00 +0000 Subject: [PATCH 05/12] Fix barrier calculation --- ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py index 9e08a6111..68181c43b 100644 --- a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py +++ b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py @@ -33,7 +33,7 @@ class RDB7Benchmark(zntrack.Node): - """Benchmark RDB7 reaction barriers from 10.1038/s41597-022-01529-6.""" + """Benchmark RDB7 reaction barriers.""" model: NodeWithCalculator = zntrack.deps() model_name: str = zntrack.params() @@ -124,17 +124,17 @@ def run(self): atoms.calc = calc bh_forward_model -= atoms.get_potential_energy() for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("ts*"): - bh_forward_model += self.get_cc_energy(qm_path) + bh_forward_ref += self.get_cc_energy(qm_path) atoms = self.get_atoms_from_molpro(qm_path) atoms.calc = calc bh_forward_model += atoms.get_potential_energy() - atoms.info["model_forward_barrier"] = bh_forward_model - atoms.info["ref_forward_barrier"] = bh_forward_ref + atoms.info["model_forward_barrier"] = bh_forward_model + atoms.info["ref_forward_barrier"] = bh_forward_ref - write_dir = OUT_PATH / self.model_name - write_dir.mkdir(parents=True, exist_ok=True) - write(write_dir / f"{label}_ts.xyz", atoms) + write_dir = OUT_PATH / self.model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{label}_ts.xyz", atoms) def build_project(repro: bool = False) -> None: From f1d1a89d4a722a24f04eccb6d38d3ac955525187 Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Tue, 20 Jan 2026 14:12:57 +0000 Subject: [PATCH 06/12] Fix analysis filename and labels --- .../RDB7/{analysis_RDB7.py => analyse_RDB7.py} | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) rename ml_peg/analysis/molecular_reactions/RDB7/{analysis_RDB7.py => analyse_RDB7.py} (92%) diff --git a/ml_peg/analysis/molecular_reactions/RDB7/analysis_RDB7.py b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py similarity index 92% rename from ml_peg/analysis/molecular_reactions/RDB7/analysis_RDB7.py rename to ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py index 4d40f0237..6ddae633b 100644 --- a/ml_peg/analysis/molecular_reactions/RDB7/analysis_RDB7.py +++ b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py @@ -15,6 +15,7 @@ from ase import units from ase.io import read, write import pytest +from tqdm import tqdm from ml_peg.analysis.utils.decorators import build_table, plot_parity from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae @@ -46,7 +47,13 @@ def labels() -> list: list List of all system names. """ - return [str(i).zfill(6) for i in range(0, 11961)] + for model_name in MODELS: + labels_list = [ + path.stem.replace("_ts", "") + for path in sorted((CALC_PATH / model_name).glob("*_ts.xyz")) + ] + break + return labels_list @pytest.fixture @@ -72,7 +79,7 @@ def barrier_heights() -> dict[str, list]: ref_stored = False for model_name in MODELS: - for label in labels(): + for label in tqdm(labels()): atoms = read(CALC_PATH / model_name / f"{label}_ts.xyz") results[model_name].append(atoms.info["model_forward_barrier"] * EV_TO_KCAL) if not ref_stored: From 165e3ca02c8a7b68f6c5c8e7e9a8b2b75f890f8d Mon Sep 17 00:00:00 2001 From: Domantas Kuryla Date: Tue, 20 Jan 2026 14:13:47 +0000 Subject: [PATCH 07/12] Add RDB7 App --- .../app/molecular_reactions/RDB7/app_RDB7.py | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 ml_peg/app/molecular_reactions/RDB7/app_RDB7.py diff --git a/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py new file mode 100644 index 000000000..0c3b8dcb0 --- /dev/null +++ b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py @@ -0,0 +1,90 @@ +"""Run RDB7 app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_column, + struct_from_scatter, +) +from ml_peg.app.utils.load import read_plot +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "RDB7" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_reactions.html#RDB7" +) +DATA_PATH = APP_ROOT / "data" / "molecular_reactions" / "RDB7" + + +class RDB7App(BaseApp): + """RDB7 benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + scatter = read_plot( + DATA_PATH / "figure_rdb7_barriers.json", + id=f"{BENCHMARK_NAME}-figure", + ) + + model_dir = DATA_PATH / MODELS[0] + if model_dir.exists(): + labels = sorted([f.stem for f in model_dir.glob("*.xyz")]) + structs = [ + f"assets/molecular_reactions/RDB7/{MODELS[0]}/{label}.xyz" + for label in labels + ] + else: + structs = [] + + plot_from_table_column( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + column_to_plot={"MAE": scatter}, + ) + + struct_from_scatter( + scatter_id=f"{BENCHMARK_NAME}-figure", + struct_id=f"{BENCHMARK_NAME}-struct-placeholder", + structs=structs, + mode="struct", + ) + + +def get_app() -> RDB7App: + """ + Get RDB7 benchmark app layout and callback registration. + + Returns + ------- + RDB7App + Benchmark layout and callback registration. + """ + return RDB7App( + name=BENCHMARK_NAME, + description=( + "Performance in predicting barrier heights for the " + "RDB7 pericyclic reactions benchmark. " + "Reference data from CCSD(T)-F12/cc-pVDZ-F12 calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "rdb7_barriers_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8068, debug=True) From 9c0723af057e5774ef2f70c32dc27278e6f23114 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 23 Jan 2026 18:25:50 +0000 Subject: [PATCH 08/12] Remove mlipx --- .../molecular_reactions/RDB7/.dvc/.gitignore | 3 - .../molecular_reactions/RDB7/.dvc/config | 0 .../calcs/molecular_reactions/RDB7/.dvcignore | 3 - .../molecular_reactions/RDB7/calc_RDB7.py | 225 ++++++++---------- 4 files changed, 97 insertions(+), 134 deletions(-) delete mode 100644 ml_peg/calcs/molecular_reactions/RDB7/.dvc/.gitignore delete mode 100644 ml_peg/calcs/molecular_reactions/RDB7/.dvc/config delete mode 100644 ml_peg/calcs/molecular_reactions/RDB7/.dvcignore diff --git a/ml_peg/calcs/molecular_reactions/RDB7/.dvc/.gitignore b/ml_peg/calcs/molecular_reactions/RDB7/.dvc/.gitignore deleted file mode 100644 index 528f30c71..000000000 --- a/ml_peg/calcs/molecular_reactions/RDB7/.dvc/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/config.local -/tmp -/cache diff --git a/ml_peg/calcs/molecular_reactions/RDB7/.dvc/config b/ml_peg/calcs/molecular_reactions/RDB7/.dvc/config deleted file mode 100644 index e69de29bb..000000000 diff --git a/ml_peg/calcs/molecular_reactions/RDB7/.dvcignore b/ml_peg/calcs/molecular_reactions/RDB7/.dvcignore deleted file mode 100644 index 519730552..000000000 --- a/ml_peg/calcs/molecular_reactions/RDB7/.dvcignore +++ /dev/null @@ -1,3 +0,0 @@ -# Add patterns of files dvc should ignore, which could improve -# the performance. Learn more at -# https://dvc.org/doc/user-guide/dvcignore diff --git a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py index 68181c43b..33f58bfaa 100644 --- a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py +++ b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py @@ -11,16 +11,15 @@ from __future__ import annotations from pathlib import Path +from typing import Any from ase import Atom, Atoms, units from ase.io import write -import mlipx -from mlipx.abc import NodeWithCalculator import numpy as np +import pytest from tqdm import tqdm -import zntrack -from ml_peg.calcs.utils.utils import chdir, download_s3_data +from ml_peg.calcs.utils.utils import download_s3_data from ml_peg.models.get_models import load_models from ml_peg.models.models import current_models @@ -32,138 +31,108 @@ OUT_PATH = Path(__file__).parent / "outputs" -class RDB7Benchmark(zntrack.Node): - """Benchmark RDB7 reaction barriers.""" - - model: NodeWithCalculator = zntrack.deps() - model_name: str = zntrack.params() - - @staticmethod - def get_cc_energy(fname): - """ - Read reference energy. - - Parameters - ---------- - fname - Name of the calculation output file. - - Returns - ------- - float - CCSD(T)-F12/cc-pVDZ-F12 energy. - """ - with open(fname) as lines: - for line in lines: - if "CCSD(T)-F12/cc-pVDZ-F12 energy" in line: - energy = float(line.strip().split()[-1]) * units.Hartree - break - return energy - - @staticmethod - def get_atoms_from_molpro(fname): - """ - Get ASE atoms from the molpro file. - - Parameters - ---------- - fname - Name of the calculation output file. - - Returns - ------- - ASE.Atoms - ASE atoms object of the structure. - """ - atoms = Atoms(None) - with open(fname) as lines: - read_started = False - for i, line in enumerate(lines): - if "ATOMIC COORDINATES" in line: - read_started = True - xyz_start = i + 4 - if read_started: - if i >= xyz_start: - items = line.strip().split() - if len(items) == 0: - break - position = ( - np.array( - [float(items[3]), float(items[4]), float(items[5])] - ) - * units.Bohr - ) - atoms += Atom(symbol=items[1], position=position) - atoms.info["charge"] = 0 - atoms.info["spin"] = 1 - return atoms - - def run(self): - """Run new benchmark.""" - data_path = ( - download_s3_data( - filename="RDB7.zip", - key="inputs/molecular_reactions/RDB7/RDB7.zip", - ) - / "RDB7" - ) - - # Read in data and attach calculator +def get_cc_energy(fname): + """ + Read reference energy. - calc = self.model.get_calculator() - # Add D3 calculator for this test - calc = self.model.add_d3_calculator(calc) + Parameters + ---------- + fname + Name of the calculation output file. - for i in tqdm(range(0, 11961)): - bh_forward_ref = 0 - bh_forward_model = 0 - label = str(i).zfill(6) - for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("r*"): - bh_forward_ref -= self.get_cc_energy(qm_path) - atoms = self.get_atoms_from_molpro(qm_path) - atoms.calc = calc - bh_forward_model -= atoms.get_potential_energy() - for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("ts*"): - bh_forward_ref += self.get_cc_energy(qm_path) - atoms = self.get_atoms_from_molpro(qm_path) - atoms.calc = calc - bh_forward_model += atoms.get_potential_energy() + Returns + ------- + float + CCSD(T)-F12/cc-pVDZ-F12 energy. + """ + with open(fname) as lines: + for line in lines: + if "CCSD(T)-F12/cc-pVDZ-F12 energy" in line: + energy = float(line.strip().split()[-1]) * units.Hartree + break + return energy - atoms.info["model_forward_barrier"] = bh_forward_model - atoms.info["ref_forward_barrier"] = bh_forward_ref - write_dir = OUT_PATH / self.model_name - write_dir.mkdir(parents=True, exist_ok=True) - write(write_dir / f"{label}_ts.xyz", atoms) +def get_atoms_from_molpro(fname): + """ + Get ASE atoms from the molpro file. + Parameters + ---------- + fname + Name of the calculation output file. -def build_project(repro: bool = False) -> None: + Returns + ------- + ASE.Atoms + ASE atoms object of the structure. """ - Build mlipx project. + atoms = Atoms(None) + with open(fname) as lines: + read_started = False + for i, line in enumerate(lines): + if "ATOMIC COORDINATES" in line: + read_started = True + xyz_start = i + 4 + if read_started: + if i >= xyz_start: + items = line.strip().split() + if len(items) == 0: + break + position = ( + np.array([float(items[3]), float(items[4]), float(items[5])]) + * units.Bohr + ) + atoms += Atom(symbol=items[1], position=position) + atoms.info["charge"] = 0 + atoms.info["spin"] = 1 + return atoms + + +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_rdb87(mlip: tuple[str, Any]) -> None: + """ + Run RDB7 benchmark. Parameters ---------- - repro - Whether to call dvc repro -f after building. + mlip + Name of model use and model to get calculator. """ - project = mlipx.Project() - benchmark_node_dict = {} - - for model_name, model in MODELS.items(): - with project.group(model_name): - benchmark = RDB7Benchmark( - model=model, - model_name=model_name, - ) - benchmark_node_dict[model_name] = benchmark - - if repro: - with chdir(Path(__file__).parent): - project.repro(build=True, force=True) - else: - project.build() - - -def test_rdb7_barrier_heights(): - """Run RDB7 benchmark via pytest.""" - build_project(repro=True) + model_name, model = mlip + calc = model.get_calculator() + + data_path = ( + download_s3_data( + filename="RDB7.zip", + key="inputs/molecular_reactions/RDB7/RDB7.zip", + ) + / "RDB7" + ) + + # Read in data and attach calculator + calc = model.get_calculator() + # Add D3 calculator for this test + calc = model.add_d3_calculator(calc) + + for i in tqdm(range(0, 11961)): + bh_forward_ref = 0 + bh_forward_model = 0 + label = str(i).zfill(6) + for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("r*"): + bh_forward_ref -= get_cc_energy(qm_path) + atoms = get_atoms_from_molpro(qm_path) + atoms.calc = calc + bh_forward_model -= atoms.get_potential_energy() + for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("ts*"): + bh_forward_ref += get_cc_energy(qm_path) + atoms = get_atoms_from_molpro(qm_path) + atoms.calc = calc + bh_forward_model += atoms.get_potential_energy() + + atoms.info["model_forward_barrier"] = bh_forward_model + atoms.info["ref_forward_barrier"] = bh_forward_ref + + write_dir = OUT_PATH / model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{label}_ts.xyz", atoms) From 0176249e02ee4219c6ca702e46a6a8d8eea651ba Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Sun, 25 Jan 2026 17:49:33 +0000 Subject: [PATCH 09/12] Mark test as slow --- ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py index 33f58bfaa..fed525593 100644 --- a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py +++ b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py @@ -89,6 +89,7 @@ def get_atoms_from_molpro(fname): return atoms +@pytest.mark.slow @pytest.mark.parametrize("mlip", MODELS.items()) def test_rdb87(mlip: tuple[str, Any]) -> None: """ From ad25fc6b9f1b3cbc55ade423211c9538efa30a77 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:19:58 +0000 Subject: [PATCH 10/12] Tidy code --- ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py | 3 +-- ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py index 6ddae633b..7884a8b76 100644 --- a/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py +++ b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py @@ -27,8 +27,7 @@ MODELS = load_models(current_models) D3_MODEL_NAMES = build_d3_name_map(MODELS) -KCAL_TO_EV = units.kcal / units.mol -EV_TO_KCAL = 1 / KCAL_TO_EV +EV_TO_KCAL = units.mol / units.kcal CALC_PATH = CALCS_ROOT / "molecular_reactions" / "RDB7" / "outputs" OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "RDB7" diff --git a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py index fed525593..5495dce16 100644 --- a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py +++ b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py @@ -25,9 +25,6 @@ MODELS = load_models(current_models) -KCAL_TO_EV = units.kcal / units.mol -EV_TO_KCAL = 1 / KCAL_TO_EV - OUT_PATH = Path(__file__).parent / "outputs" From 8777ac3ceaf616b6cbf39f6989bcacf46efcb573 Mon Sep 17 00:00:00 2001 From: joehart2001 Date: Thu, 29 Jan 2026 22:42:04 +0000 Subject: [PATCH 11/12] add density plot --- .../molecular_reactions/RDB7/analyse_RDB7.py | 60 +++++++++++++++---- .../app/molecular_reactions/RDB7/app_RDB7.py | 47 +++++++-------- 2 files changed, 68 insertions(+), 39 deletions(-) diff --git a/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py index 7884a8b76..6a78e26dc 100644 --- a/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py +++ b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py @@ -11,14 +11,20 @@ from __future__ import annotations from pathlib import Path +from typing import Any from ase import units from ase.io import read, write import pytest from tqdm import tqdm -from ml_peg.analysis.utils.decorators import build_table, plot_parity -from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae +from ml_peg.analysis.utils.decorators import build_table, plot_density_scatter +from ml_peg.analysis.utils.utils import ( + build_d3_name_map, + build_density_inputs, + load_metrics_config, + mae, +) from ml_peg.app import APP_ROOT from ml_peg.calcs import CALCS_ROOT from ml_peg.models.get_models import load_models @@ -56,15 +62,6 @@ def labels() -> list: @pytest.fixture -@plot_parity( - filename=OUT_PATH / "figure_rdb7_barriers.json", - title="Reaction barriers", - x_label="Predicted barrier / kcal/mol", - y_label="Reference barrier / kcal/mol", - hoverdata={ - "Labels": labels(), - }, -) def barrier_heights() -> dict[str, list]: """ Get barrier heights for all systems. @@ -92,6 +89,40 @@ def barrier_heights() -> dict[str, list]: return results +@pytest.fixture +@plot_density_scatter( + filename=OUT_PATH / "figure_barrier_density.json", + title="Reaction barrier density plot", + x_label="Reference reaction barrier / kcal/mol", + y_label="Predicted reaction barrier / kcal/mol", +) +def barrier_density(barrier_heights: dict[str, dict[str, Any]]) -> dict[str, dict]: + """ + Density scatter inputs for reaction barrier. + + Parameters + ---------- + barrier_heights + Aggregated barrier height data per model. + + Returns + ------- + dict[str, dict] + Mapping of model name to density-scatter data. + """ + stats_dict = { + model_name: { + "barrier": { + "ref": barrier_heights["ref"], + "pred": barrier_heights[model_name], + } + } + for model_name in MODELS + } + + return build_density_inputs(MODELS, stats_dict, "barrier", metric_fn=mae) + + @pytest.fixture def get_mae(barrier_heights) -> dict[str, float]: """ @@ -137,7 +168,10 @@ def metrics(get_mae: dict[str, float]) -> dict[str, dict]: return {"MAE": get_mae} -def test_rdb7_barriers(metrics: dict[str, dict]) -> None: +def test_rdb7_barriers( + metrics: dict[str, dict], + barrier_density: dict[str, dict], +) -> None: """ Run rdb7_barriers test. @@ -145,5 +179,7 @@ def test_rdb7_barriers(metrics: dict[str, dict]) -> None: ---------- metrics All new benchmark metric names and dictionary of values for each model. + barrier_density + Density scatter inputs for reaction barrier. """ return diff --git a/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py index 0c3b8dcb0..4ab87caa1 100644 --- a/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py +++ b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py @@ -8,10 +8,9 @@ from ml_peg.app import APP_ROOT from ml_peg.app.base_app import BaseApp from ml_peg.app.utils.build_callbacks import ( - plot_from_table_column, - struct_from_scatter, + plot_from_table_cell, ) -from ml_peg.app.utils.load import read_plot +from ml_peg.app.utils.load import read_density_plot_for_model from ml_peg.models.get_models import get_model_names from ml_peg.models.models import current_models @@ -28,32 +27,26 @@ class RDB7App(BaseApp): def register_callbacks(self) -> None: """Register callbacks to app.""" - scatter = read_plot( - DATA_PATH / "figure_rdb7_barriers.json", - id=f"{BENCHMARK_NAME}-figure", - ) - - model_dir = DATA_PATH / MODELS[0] - if model_dir.exists(): - labels = sorted([f.stem for f in model_dir.glob("*.xyz")]) - structs = [ - f"assets/molecular_reactions/RDB7/{MODELS[0]}/{label}.xyz" - for label in labels - ] - else: - structs = [] + # Build plots for models with data (read_density_plot_for_model + # returns None for models without data) + density_plots: dict[str, dict] = {} + for model in MODELS: + plots = { + "MAE": read_density_plot_for_model( + filename=DATA_PATH / "figure_barrier_density.json", + model=model, + id=f"{BENCHMARK_NAME}-{model}-barrier-figure", + ), + } + # Filter out None values (models without data for that metric) + model_plots = {k: v for k, v in plots.items() if v is not None} + if model_plots: + density_plots[model] = model_plots - plot_from_table_column( + plot_from_table_cell( table_id=self.table_id, plot_id=f"{BENCHMARK_NAME}-figure-placeholder", - column_to_plot={"MAE": scatter}, - ) - - struct_from_scatter( - scatter_id=f"{BENCHMARK_NAME}-figure", - struct_id=f"{BENCHMARK_NAME}-struct-placeholder", - structs=structs, - mode="struct", + cell_to_plot=density_plots, ) @@ -77,7 +70,7 @@ def get_app() -> RDB7App: table_path=DATA_PATH / "rdb7_barriers_metrics_table.json", extra_components=[ Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), - Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + # Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), ], ) From 9984d44867e585a7a8a40ef95f16297b3972cd37 Mon Sep 17 00:00:00 2001 From: ElliottKasoar <45317199+ElliottKasoar@users.noreply.github.com> Date: Fri, 30 Jan 2026 11:43:43 +0000 Subject: [PATCH 12/12] Tidy and add warning for missing data --- ml_peg/app/molecular_reactions/RDB7/app_RDB7.py | 2 +- ml_peg/app/utils/load.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py index 4ab87caa1..9a93c0c0a 100644 --- a/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py +++ b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py @@ -17,7 +17,7 @@ MODELS = get_model_names(current_models) BENCHMARK_NAME = "RDB7" DOCS_URL = ( - "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_reactions.html#RDB7" + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_reactions.html#rdb7" ) DATA_PATH = APP_ROOT / "data" / "molecular_reactions" / "RDB7" diff --git a/ml_peg/app/utils/load.py b/ml_peg/app/utils/load.py index b4f10d7cd..7af0223ad 100644 --- a/ml_peg/app/utils/load.py +++ b/ml_peg/app/utils/load.py @@ -5,6 +5,7 @@ from copy import deepcopy import json from pathlib import Path +from warnings import warn from dash.dash_table import DataTable from dash.dcc import Graph @@ -315,6 +316,7 @@ def read_density_plot_for_model( # Check if model has actual data (not just the reference line) # If only 1 trace (the y=x line) or 0 traces, model has no data if len(filtered_fig.get("data", [])) <= 1: + warn("No model data found", stacklevel=2) return None return Graph(id=id, figure=filtered_fig)