Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 185 additions & 0 deletions ml_peg/analysis/molecular_reactions/RDB7/analyse_RDB7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
"""
Analyse the RDB7 reaction barrier dataset.

Spiekermann, K., Pattanaik, L. & Green, W.H.
High accuracy barrier heights, enthalpies,
and rate coefficients for chemical reactions.
Sci Data 9, 417 (2022)
https://doi.org/10.1038/s41597-022-01529-6
"""

from __future__ import annotations

from pathlib import Path
from typing import Any

from ase import units
from ase.io import read, write
import pytest
from tqdm import tqdm

from ml_peg.analysis.utils.decorators import build_table, plot_density_scatter
from ml_peg.analysis.utils.utils import (
build_d3_name_map,
build_density_inputs,
load_metrics_config,
mae,
)
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)
D3_MODEL_NAMES = build_d3_name_map(MODELS)

EV_TO_KCAL = units.mol / units.kcal
CALC_PATH = CALCS_ROOT / "molecular_reactions" / "RDB7" / "outputs"
OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "RDB7"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)


def labels() -> list:
"""
Get list of system names.

Returns
-------
list
List of all system names.
"""
for model_name in MODELS:
labels_list = [
path.stem.replace("_ts", "")
for path in sorted((CALC_PATH / model_name).glob("*_ts.xyz"))
]
break
return labels_list


@pytest.fixture
def barrier_heights() -> dict[str, list]:
"""
Get barrier heights for all systems.

Returns
-------
dict[str, list]
Dictionary of all reference and predicted barrier heights.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

for model_name in MODELS:
for label in tqdm(labels()):
atoms = read(CALC_PATH / model_name / f"{label}_ts.xyz")
results[model_name].append(atoms.info["model_forward_barrier"] * EV_TO_KCAL)
if not ref_stored:
results["ref"].append(atoms.info["ref_forward_barrier"] * EV_TO_KCAL)

# Write structures for app
structs_dir = OUT_PATH / model_name
structs_dir.mkdir(parents=True, exist_ok=True)
write(structs_dir / f"{label}_ts.xyz", atoms)
ref_stored = True
return results


@pytest.fixture
@plot_density_scatter(
filename=OUT_PATH / "figure_barrier_density.json",
title="Reaction barrier density plot",
x_label="Reference reaction barrier / kcal/mol",
y_label="Predicted reaction barrier / kcal/mol",
)
def barrier_density(barrier_heights: dict[str, dict[str, Any]]) -> dict[str, dict]:
"""
Density scatter inputs for reaction barrier.

Parameters
----------
barrier_heights
Aggregated barrier height data per model.

Returns
-------
dict[str, dict]
Mapping of model name to density-scatter data.
"""
stats_dict = {
model_name: {
"barrier": {
"ref": barrier_heights["ref"],
"pred": barrier_heights[model_name],
}
}
for model_name in MODELS
}

return build_density_inputs(MODELS, stats_dict, "barrier", metric_fn=mae)


@pytest.fixture
def get_mae(barrier_heights) -> dict[str, float]:
"""
Get mean absolute error for barrier heights.

Parameters
----------
barrier_heights
Dictionary of reference and predicted barrier heights.

Returns
-------
dict[str, float]
Dictionary of predicted barrier height errors for all models.
"""
results = {}
for model_name in MODELS:
results[model_name] = mae(barrier_heights["ref"], barrier_heights[model_name])
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "rdb7_barriers_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
mlip_name_map=D3_MODEL_NAMES,
)
def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
"""
Get all metrics.

Parameters
----------
get_mae
Mean absolute errors for all models.

Returns
-------
dict[str, dict]
Metric names and values for all models.
"""
return {"MAE": get_mae}


def test_rdb7_barriers(
metrics: dict[str, dict],
barrier_density: dict[str, dict],
) -> None:
"""
Run rdb7_barriers test.

Parameters
----------
metrics
All new benchmark metric names and dictionary of values for each model.
barrier_density
Density scatter inputs for reaction barrier.
"""
return
7 changes: 7 additions & 0 deletions ml_peg/analysis/molecular_reactions/RDB7/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metrics:
MAE:
good: 0.0
bad: 20.0
unit: kcal/mol
tooltip: Mean Absolute Error for all systems
level_of_theory: CCSD(T)-F12/cc-pVDZ-F12
83 changes: 83 additions & 0 deletions ml_peg/app/molecular_reactions/RDB7/app_RDB7.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Run RDB7 app."""

from __future__ import annotations

from dash import Dash
from dash.html import Div

from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import (
plot_from_table_cell,
)
from ml_peg.app.utils.load import read_density_plot_for_model
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
BENCHMARK_NAME = "RDB7"
DOCS_URL = (
"https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_reactions.html#rdb7"
)
DATA_PATH = APP_ROOT / "data" / "molecular_reactions" / "RDB7"


class RDB7App(BaseApp):
"""RDB7 benchmark app layout and callbacks."""

def register_callbacks(self) -> None:
"""Register callbacks to app."""
# Build plots for models with data (read_density_plot_for_model
# returns None for models without data)
density_plots: dict[str, dict] = {}
for model in MODELS:
plots = {
"MAE": read_density_plot_for_model(
filename=DATA_PATH / "figure_barrier_density.json",
model=model,
id=f"{BENCHMARK_NAME}-{model}-barrier-figure",
),
}
# Filter out None values (models without data for that metric)
model_plots = {k: v for k, v in plots.items() if v is not None}
if model_plots:
density_plots[model] = model_plots

plot_from_table_cell(
table_id=self.table_id,
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
cell_to_plot=density_plots,
)


def get_app() -> RDB7App:
"""
Get RDB7 benchmark app layout and callback registration.

Returns
-------
RDB7App
Benchmark layout and callback registration.
"""
return RDB7App(
name=BENCHMARK_NAME,
description=(
"Performance in predicting barrier heights for the "
"RDB7 pericyclic reactions benchmark. "
"Reference data from CCSD(T)-F12/cc-pVDZ-F12 calculations."
),
docs_url=DOCS_URL,
table_path=DATA_PATH / "rdb7_barriers_metrics_table.json",
extra_components=[
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
# Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
],
)


if __name__ == "__main__":
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
benchmark_app = get_app()
full_app.layout = benchmark_app.layout
benchmark_app.register_callbacks()
full_app.run(port=8068, debug=True)
2 changes: 2 additions & 0 deletions ml_peg/app/utils/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from copy import deepcopy
import json
from pathlib import Path
from warnings import warn

from dash.dash_table import DataTable
from dash.dcc import Graph
Expand Down Expand Up @@ -315,6 +316,7 @@ def read_density_plot_for_model(
# Check if model has actual data (not just the reference line)
# If only 1 trace (the y=x line) or 0 traces, model has no data
if len(filtered_fig.get("data", [])) <= 1:
warn("No model data found", stacklevel=2)
return None

return Graph(id=id, figure=filtered_fig)
Loading