diff --git a/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py new file mode 100644 index 00000000..6a78e26d --- /dev/null +++ b/ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py @@ -0,0 +1,185 @@ +""" +Analyse the RDB7 reaction barrier dataset. + +Spiekermann, K., Pattanaik, L. & Green, W.H. +High accuracy barrier heights, enthalpies, +and rate coefficients for chemical reactions. +Sci Data 9, 417 (2022) +https://doi.org/10.1038/s41597-022-01529-6 +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from ase import units +from ase.io import read, write +import pytest +from tqdm import tqdm + +from ml_peg.analysis.utils.decorators import build_table, plot_density_scatter +from ml_peg.analysis.utils.utils import ( + build_d3_name_map, + build_density_inputs, + load_metrics_config, + mae, +) +from ml_peg.app import APP_ROOT +from ml_peg.calcs import CALCS_ROOT +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) +D3_MODEL_NAMES = build_d3_name_map(MODELS) + +EV_TO_KCAL = units.mol / units.kcal +CALC_PATH = CALCS_ROOT / "molecular_reactions" / "RDB7" / "outputs" +OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "RDB7" + +METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") +DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( + METRICS_CONFIG_PATH +) + + +def labels() -> list: + """ + Get list of system names. + + Returns + ------- + list + List of all system names. + """ + for model_name in MODELS: + labels_list = [ + path.stem.replace("_ts", "") + for path in sorted((CALC_PATH / model_name).glob("*_ts.xyz")) + ] + break + return labels_list + + +@pytest.fixture +def barrier_heights() -> dict[str, list]: + """ + Get barrier heights for all systems. + + Returns + ------- + dict[str, list] + Dictionary of all reference and predicted barrier heights. + """ + results = {"ref": []} | {mlip: [] for mlip in MODELS} + ref_stored = False + + for model_name in MODELS: + for label in tqdm(labels()): + atoms = read(CALC_PATH / model_name / f"{label}_ts.xyz") + results[model_name].append(atoms.info["model_forward_barrier"] * EV_TO_KCAL) + if not ref_stored: + results["ref"].append(atoms.info["ref_forward_barrier"] * EV_TO_KCAL) + + # Write structures for app + structs_dir = OUT_PATH / model_name + structs_dir.mkdir(parents=True, exist_ok=True) + write(structs_dir / f"{label}_ts.xyz", atoms) + ref_stored = True + return results + + +@pytest.fixture +@plot_density_scatter( + filename=OUT_PATH / "figure_barrier_density.json", + title="Reaction barrier density plot", + x_label="Reference reaction barrier / kcal/mol", + y_label="Predicted reaction barrier / kcal/mol", +) +def barrier_density(barrier_heights: dict[str, dict[str, Any]]) -> dict[str, dict]: + """ + Density scatter inputs for reaction barrier. + + Parameters + ---------- + barrier_heights + Aggregated barrier height data per model. + + Returns + ------- + dict[str, dict] + Mapping of model name to density-scatter data. + """ + stats_dict = { + model_name: { + "barrier": { + "ref": barrier_heights["ref"], + "pred": barrier_heights[model_name], + } + } + for model_name in MODELS + } + + return build_density_inputs(MODELS, stats_dict, "barrier", metric_fn=mae) + + +@pytest.fixture +def get_mae(barrier_heights) -> dict[str, float]: + """ + Get mean absolute error for barrier heights. + + Parameters + ---------- + barrier_heights + Dictionary of reference and predicted barrier heights. + + Returns + ------- + dict[str, float] + Dictionary of predicted barrier height errors for all models. + """ + results = {} + for model_name in MODELS: + results[model_name] = mae(barrier_heights["ref"], barrier_heights[model_name]) + return results + + +@pytest.fixture +@build_table( + filename=OUT_PATH / "rdb7_barriers_metrics_table.json", + metric_tooltips=DEFAULT_TOOLTIPS, + thresholds=DEFAULT_THRESHOLDS, + mlip_name_map=D3_MODEL_NAMES, +) +def metrics(get_mae: dict[str, float]) -> dict[str, dict]: + """ + Get all metrics. + + Parameters + ---------- + get_mae + Mean absolute errors for all models. + + Returns + ------- + dict[str, dict] + Metric names and values for all models. + """ + return {"MAE": get_mae} + + +def test_rdb7_barriers( + metrics: dict[str, dict], + barrier_density: dict[str, dict], +) -> None: + """ + Run rdb7_barriers test. + + Parameters + ---------- + metrics + All new benchmark metric names and dictionary of values for each model. + barrier_density + Density scatter inputs for reaction barrier. + """ + return diff --git a/ml_peg/analysis/molecular_reactions/RDB7/metrics.yml b/ml_peg/analysis/molecular_reactions/RDB7/metrics.yml new file mode 100644 index 00000000..870ce5be --- /dev/null +++ b/ml_peg/analysis/molecular_reactions/RDB7/metrics.yml @@ -0,0 +1,7 @@ +metrics: + MAE: + good: 0.0 + bad: 20.0 + unit: kcal/mol + tooltip: Mean Absolute Error for all systems + level_of_theory: CCSD(T)-F12/cc-pVDZ-F12 diff --git a/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py new file mode 100644 index 00000000..9a93c0c0 --- /dev/null +++ b/ml_peg/app/molecular_reactions/RDB7/app_RDB7.py @@ -0,0 +1,83 @@ +"""Run RDB7 app.""" + +from __future__ import annotations + +from dash import Dash +from dash.html import Div + +from ml_peg.app import APP_ROOT +from ml_peg.app.base_app import BaseApp +from ml_peg.app.utils.build_callbacks import ( + plot_from_table_cell, +) +from ml_peg.app.utils.load import read_density_plot_for_model +from ml_peg.models.get_models import get_model_names +from ml_peg.models.models import current_models + +MODELS = get_model_names(current_models) +BENCHMARK_NAME = "RDB7" +DOCS_URL = ( + "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_reactions.html#rdb7" +) +DATA_PATH = APP_ROOT / "data" / "molecular_reactions" / "RDB7" + + +class RDB7App(BaseApp): + """RDB7 benchmark app layout and callbacks.""" + + def register_callbacks(self) -> None: + """Register callbacks to app.""" + # Build plots for models with data (read_density_plot_for_model + # returns None for models without data) + density_plots: dict[str, dict] = {} + for model in MODELS: + plots = { + "MAE": read_density_plot_for_model( + filename=DATA_PATH / "figure_barrier_density.json", + model=model, + id=f"{BENCHMARK_NAME}-{model}-barrier-figure", + ), + } + # Filter out None values (models without data for that metric) + model_plots = {k: v for k, v in plots.items() if v is not None} + if model_plots: + density_plots[model] = model_plots + + plot_from_table_cell( + table_id=self.table_id, + plot_id=f"{BENCHMARK_NAME}-figure-placeholder", + cell_to_plot=density_plots, + ) + + +def get_app() -> RDB7App: + """ + Get RDB7 benchmark app layout and callback registration. + + Returns + ------- + RDB7App + Benchmark layout and callback registration. + """ + return RDB7App( + name=BENCHMARK_NAME, + description=( + "Performance in predicting barrier heights for the " + "RDB7 pericyclic reactions benchmark. " + "Reference data from CCSD(T)-F12/cc-pVDZ-F12 calculations." + ), + docs_url=DOCS_URL, + table_path=DATA_PATH / "rdb7_barriers_metrics_table.json", + extra_components=[ + Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), + # Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), + ], + ) + + +if __name__ == "__main__": + full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) + benchmark_app = get_app() + full_app.layout = benchmark_app.layout + benchmark_app.register_callbacks() + full_app.run(port=8068, debug=True) diff --git a/ml_peg/app/utils/load.py b/ml_peg/app/utils/load.py index b4f10d7c..7af0223a 100644 --- a/ml_peg/app/utils/load.py +++ b/ml_peg/app/utils/load.py @@ -5,6 +5,7 @@ from copy import deepcopy import json from pathlib import Path +from warnings import warn from dash.dash_table import DataTable from dash.dcc import Graph @@ -315,6 +316,7 @@ def read_density_plot_for_model( # Check if model has actual data (not just the reference line) # If only 1 trace (the y=x line) or 0 traces, model has no data if len(filtered_fig.get("data", [])) <= 1: + warn("No model data found", stacklevel=2) return None return Graph(id=id, figure=filtered_fig) diff --git a/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py new file mode 100644 index 00000000..5495dce1 --- /dev/null +++ b/ml_peg/calcs/molecular_reactions/RDB7/calc_RDB7.py @@ -0,0 +1,136 @@ +""" +Calculate the RDB7 reaction barrier dataset. + +Spiekermann, K., Pattanaik, L. & Green, W.H. +High accuracy barrier heights, enthalpies, +and rate coefficients for chemical reactions. +Sci Data 9, 417 (2022) +https://doi.org/10.1038/s41597-022-01529-6 +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from ase import Atom, Atoms, units +from ase.io import write +import numpy as np +import pytest +from tqdm import tqdm + +from ml_peg.calcs.utils.utils import download_s3_data +from ml_peg.models.get_models import load_models +from ml_peg.models.models import current_models + +MODELS = load_models(current_models) + +OUT_PATH = Path(__file__).parent / "outputs" + + +def get_cc_energy(fname): + """ + Read reference energy. + + Parameters + ---------- + fname + Name of the calculation output file. + + Returns + ------- + float + CCSD(T)-F12/cc-pVDZ-F12 energy. + """ + with open(fname) as lines: + for line in lines: + if "CCSD(T)-F12/cc-pVDZ-F12 energy" in line: + energy = float(line.strip().split()[-1]) * units.Hartree + break + return energy + + +def get_atoms_from_molpro(fname): + """ + Get ASE atoms from the molpro file. + + Parameters + ---------- + fname + Name of the calculation output file. + + Returns + ------- + ASE.Atoms + ASE atoms object of the structure. + """ + atoms = Atoms(None) + with open(fname) as lines: + read_started = False + for i, line in enumerate(lines): + if "ATOMIC COORDINATES" in line: + read_started = True + xyz_start = i + 4 + if read_started: + if i >= xyz_start: + items = line.strip().split() + if len(items) == 0: + break + position = ( + np.array([float(items[3]), float(items[4]), float(items[5])]) + * units.Bohr + ) + atoms += Atom(symbol=items[1], position=position) + atoms.info["charge"] = 0 + atoms.info["spin"] = 1 + return atoms + + +@pytest.mark.slow +@pytest.mark.parametrize("mlip", MODELS.items()) +def test_rdb87(mlip: tuple[str, Any]) -> None: + """ + Run RDB7 benchmark. + + Parameters + ---------- + mlip + Name of model use and model to get calculator. + """ + model_name, model = mlip + calc = model.get_calculator() + + data_path = ( + download_s3_data( + filename="RDB7.zip", + key="inputs/molecular_reactions/RDB7/RDB7.zip", + ) + / "RDB7" + ) + + # Read in data and attach calculator + calc = model.get_calculator() + # Add D3 calculator for this test + calc = model.add_d3_calculator(calc) + + for i in tqdm(range(0, 11961)): + bh_forward_ref = 0 + bh_forward_model = 0 + label = str(i).zfill(6) + for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("r*"): + bh_forward_ref -= get_cc_energy(qm_path) + atoms = get_atoms_from_molpro(qm_path) + atoms.calc = calc + bh_forward_model -= atoms.get_potential_energy() + for qm_path in (data_path / "qm_logs" / f"rxn{label}").glob("ts*"): + bh_forward_ref += get_cc_energy(qm_path) + atoms = get_atoms_from_molpro(qm_path) + atoms.calc = calc + bh_forward_model += atoms.get_potential_energy() + + atoms.info["model_forward_barrier"] = bh_forward_model + atoms.info["ref_forward_barrier"] = bh_forward_ref + + write_dir = OUT_PATH / model_name + write_dir.mkdir(parents=True, exist_ok=True) + write(write_dir / f"{label}_ts.xyz", atoms)