Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions ml_peg/analysis/molecular_reactions/Criegee22/analyse_Criegee22.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""
Analyse Criegee22 reaction barrier benchmark.

10.1021/acs.jpca.8b09349
"""

from __future__ import annotations

from pathlib import Path

from ase import units
from ase.io import read, write
import pytest

from ml_peg.analysis.utils.decorators import build_table, plot_parity
from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)
D3_MODEL_NAMES = build_d3_name_map(MODELS)

EV_TO_KCAL = units.mol / units.kcal
CALC_PATH = CALCS_ROOT / "molecular_reactions" / "Criegee22" / "outputs"
OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "Criegee22"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)


def labels() -> list:
"""
Get list of system names.

Returns
-------
list
List of all system names.
"""
for model_name in MODELS:
labels_list = [
path.stem for path in sorted((CALC_PATH / model_name).glob("*.xyz"))
]
break
return labels_list


@pytest.fixture
@plot_parity(
filename=OUT_PATH / "figure_criegee22_barriers.json",
title="Reaction barriers",
x_label="Predicted barrier / kcal/mol",
y_label="Reference barrier / kcal/mol",
hoverdata={
"Labels": labels(),
},
)
def barrier_heights() -> dict[str, list]:
"""
Get barrier heights for all systems.

Returns
-------
dict[str, list]
Dictionary of all reference and predicted barrier heights.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

for model_name in MODELS:
for label in labels():
structs = read(CALC_PATH / model_name / f"{label}.xyz", index=":")

results[model_name].append(
(structs[1].info["model_energy"] - structs[0].info["model_energy"])
* EV_TO_KCAL
)
if not ref_stored:
results["ref"].append(
(structs[1].info["ref_energy"] - structs[0].info["ref_energy"])
* EV_TO_KCAL
)

# Write structures for app
structs_dir = OUT_PATH / model_name
structs_dir.mkdir(parents=True, exist_ok=True)
write(structs_dir / f"{label}_rct.xyz", structs)
ref_stored = True
return results


@pytest.fixture
def get_mae(barrier_heights) -> dict[str, float]:
"""
Get mean absolute error for barrier heights.

Parameters
----------
barrier_heights
Dictionary of reference and predicted barrier heights.

Returns
-------
dict[str, float]
Dictionary of predicted barrier height errors for all models.
"""
results = {}
for model_name in MODELS:
results[model_name] = mae(barrier_heights["ref"], barrier_heights[model_name])
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "criegee22_barriers_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
mlip_name_map=D3_MODEL_NAMES,
)
def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
"""
Get all metrics.

Parameters
----------
get_mae
Mean absolute errors for all models.

Returns
-------
dict[str, dict]
Metric names and values for all models.
"""
return {"MAE": get_mae}


def test_criegee22_barriers(metrics: dict[str, dict]) -> None:
"""
Run Criegee22 barriers test.

Parameters
----------
metrics
All new benchmark metric names and dictionary of values for each model.
"""
return
7 changes: 7 additions & 0 deletions ml_peg/analysis/molecular_reactions/Criegee22/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metrics:
MAE:
good: 0.0
bad: 20.0
unit: kcal/mol
tooltip: Mean Absolute Error for all systems
level_of_theory: CCSDT(Q)/CBS
90 changes: 90 additions & 0 deletions ml_peg/app/molecular_reactions/Criegee22/app_Criegee22.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""Run Criegee22 app."""

from __future__ import annotations

from dash import Dash
from dash.html import Div

from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import (
plot_from_table_column,
struct_from_scatter,
)
from ml_peg.app.utils.load import read_plot
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
BENCHMARK_NAME = "Criegee22"
DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_reactions.html#Criegee22"
DATA_PATH = APP_ROOT / "data" / "molecular_reactions" / "Criegee22"


class Criegee22App(BaseApp):
"""Criegee22 benchmark app layout and callbacks."""

def register_callbacks(self) -> None:
"""Register callbacks to app."""
scatter = read_plot(
DATA_PATH / "figure_criegee22_barriers.json",
id=f"{BENCHMARK_NAME}-figure",
)

model_dir = DATA_PATH / MODELS[0]
if model_dir.exists():
base_labels = sorted(
[f.stem.rsplit("_", 1)[0] for f in model_dir.glob("*_rct.xyz")]
)
structs = [
Comment thread
ElliottKasoar marked this conversation as resolved.
f"assets/molecular_reactions/Criegee22/{MODELS[0]}/{label}_rct.xyz"
for label in base_labels
]
else:
structs = []

plot_from_table_column(
table_id=self.table_id,
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
column_to_plot={"MAE": scatter},
)

struct_from_scatter(
scatter_id=f"{BENCHMARK_NAME}-figure",
struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
structs=structs,
mode="struct",
)


def get_app() -> Criegee22App:
"""
Get Criegee22 benchmark app layout and callback registration.

Returns
-------
Criegee22App
Benchmark layout and callback registration.
"""
return Criegee22App(
name=BENCHMARK_NAME,
description=(
"Performance in predicting barrier heights for the "
"Criegee22 reaction benchmark. "
"Reference data from CCSDT(Q) calculations."
),
docs_url=DOCS_URL,
table_path=DATA_PATH / "criegee22_barriers_metrics_table.json",
extra_components=[
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
],
)


if __name__ == "__main__":
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
benchmark_app = get_app()
full_app.layout = benchmark_app.layout
benchmark_app.register_callbacks()
full_app.run(port=8068, debug=True)
78 changes: 78 additions & 0 deletions ml_peg/calcs/molecular_reactions/Criegee22/calc_Criegee22.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Calculate the Criegee22 dataset for reactions involving Crieegee intermediates.

C. D. Smith, A. Karton. J. Comput. Chem. 2020, 41, 328–339.
DOI: 10.1002/jcc.26106
"""

from __future__ import annotations

from pathlib import Path
from typing import Any

from ase import units
from ase.io import read, write
import pytest
from tqdm import tqdm

from ml_peg.calcs.utils.utils import download_s3_data
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)

KJ_TO_EV = units.kJ / units.mol

OUT_PATH = Path(__file__).parent / "outputs"


@pytest.mark.parametrize("mlip", MODELS.items())
def test_criegee22(mlip: tuple[str, Any]) -> None:
"""
Run Criegee22 benchmark.

Parameters
----------
mlip
Name of model use and model to get calculator.
"""
model_name, model = mlip
calc = model.get_calculator()

data_path = (
download_s3_data(
filename="Criegee22.zip",
key="inputs/molecular_reactions/Criegee22/Criegee22.zip",
)
/ "Criegee22"
)

# Read in data and attach calculator
calc = model.get_calculator()
# Add D3 calculator for this test
calc = model.add_d3_calculator(calc)

with open(data_path / "reference.txt") as lines:
# Skip header
next(lines)
for line in tqdm(lines, total=22):
items = line.strip().split()
label = items[0]
bh_ref = float(items[8]) * KJ_TO_EV
atoms_reac = read(data_path / "structures" / f"{label}-reac.xyz")
atoms_reac.calc = calc
atoms_reac.info["charge"] = 0
atoms_reac.info["spin"] = 1
atoms_reac.info["model_energy"] = atoms_reac.get_potential_energy()
atoms_reac.info["ref_energy"] = 0

atoms_ts = read(data_path / "structures" / f"{label}-TS.xyz")
atoms_ts.calc = calc
atoms_ts.info["charge"] = 0
atoms_ts.info["spin"] = 1
atoms_ts.info["model_energy"] = atoms_ts.get_potential_energy()
atoms_ts.info["ref_energy"] = bh_ref

write_dir = OUT_PATH / model_name
write_dir.mkdir(parents=True, exist_ok=True)
write(write_dir / f"{label}.xyz", [atoms_reac, atoms_ts])