Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/user_guide/benchmarks/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ Benchmarks
molecular_crystal
molecular
bulk_crystal
lanthanides
non_covalent_interactions
45 changes: 45 additions & 0 deletions docs/source/user_guide/benchmarks/lanthanides.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
===========
Lanthanides
===========

Isomer complexes
================

Summary
-------

Performance in predicting relative isomer energies for lanthanide complexes
compared to r2SCAN-3c DFT reference data.


Metrics
-------

1. Relative isomer energy MAE

Accuracy of relative isomer energy predictions.

For each complex, the relative isomer energies are computed with respect to the
lowest-energy isomer in the r2SCAN-3c reference set and compared to the r2SCAN-3c
relative energies reported in the reference dataset.


Computational cost
------------------

Low: tests are likely to take less than a minute to run on CPU.


Data availability
-----------------

Input structures:

* T. Rose, M. Bursch, J.-M. Mewes, and S. Grimme, Fast and Robust Modeling of
Lanthanide and Actinide Complexes, Biomolecules, and Molecular Crystals with
the Extended GFN-FF Model, Inorganic Chemistry 63 (2024) 19364-19374.

Reference data:

* Relative isomer energies from r2SCAN-3c (see Supporting Information of the
above reference).
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
"""Analyse lanthanide isomer complex benchmark."""

from __future__ import annotations

from pathlib import Path

from ase import units
from ase.io import read, write
import pytest

from ml_peg.analysis.utils.decorators import build_table, plot_parity
from ml_peg.analysis.utils.utils import load_metrics_config, mae
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
CALC_PATH = CALCS_ROOT / "lanthanides" / "isomer_complexes" / "outputs"
OUT_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)

EV_TO_KCAL = units.mol / units.kcal


def get_system_names() -> list[str]:
"""
Get sorted list of system names.

Returns
-------
list[str]
Sorted list of system names.
"""
for model_name in MODELS:
model_dir = CALC_PATH / model_name
if model_dir.exists():
# Get unique labels without _iso1.xyz suffix
return sorted(
{path.stem.split("_iso")[0] for path in model_dir.glob("*.xyz")}
)

return []


def get_labels() -> list[tuple[str, str]]:
"""
Get sorted list of (system, isomer) tuples for consistent ordering.

Returns
-------
list[tuple[str, str]]
List of (system, isomer) tuples sorted by system then isomer.
"""
labels = {}
for model_name in MODELS:
model_dir = CALC_PATH / model_name
if model_dir.exists():
for system in get_system_names():
labels[system] = sorted(
[
path.stem.split("_")[2]
for path in model_dir.glob(f"{system}_iso*.xyz")
]
)
return labels


def build_hoverdata() -> dict[str, list[str]]:
"""
Build hoverdata dictionary for parity plot.

Returns
-------
dict[str, list[str]]
Dictionary with "System" and "Isomer" keys for hover information.
"""
labels = get_labels()
return {
"System": [key for key, values in labels.items() for _ in values],
"Isomer": [value for values in labels.values() for value in values],
}


@pytest.fixture
@plot_parity(
filename=OUT_PATH / "figure_isomer_complexes.json",
title="Lanthanide isomer relative energies",
x_label="Model Delta E (kcal/mol)",
y_label="r2SCAN-3c Delta E (kcal/mol)",
hoverdata=build_hoverdata(),
)
def isomer_relative_energies() -> dict[str, list]:
"""
Build parity data for lanthanide isomer complexes benchmark.

Returns
-------
dict[str, list]
Reference and per-model relative energies.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

for model_name in MODELS:
model_dir = CALC_PATH / model_name
if not model_dir.exists():
# Model directory doesn't exist, fill with None
results[model_name] = [None] * len(get_labels())
continue

structs_dir = OUT_PATH / model_name
structs_dir.mkdir(parents=True, exist_ok=True)

labels = get_labels()
for system in labels:
pred_energies = []
ref_energies = []
for isomer in labels[system]:
xyz_path = model_dir / f"{system}_{isomer}.xyz"
atoms = read(xyz_path)

pred_energies.append(atoms.info.get("model_energy") * EV_TO_KCAL)
ref_energies.append(atoms.info.get("ref_energy") * EV_TO_KCAL)

# Copy structure to app directory
write(structs_dir / f"{system}_{isomer}.xyz", atoms)

# Compute relative energies
min_pred_energy = min(pred_energies)
pred_energies = [energy - min_pred_energy for energy in pred_energies]
results[model_name].extend(pred_energies)

if not ref_stored:
min_ref_energy = min(ref_energies)
ref_energies = [energy - min_ref_energy for energy in ref_energies]
results["ref"].extend(ref_energies)

ref_stored = True

return results


@pytest.fixture
def isomer_complex_errors(isomer_relative_energies) -> dict[str, float | None]:
"""
Get mean absolute error for relative energies.

Parameters
----------
isomer_relative_energies
Dictionary of reference and predicted relative energies.

Returns
-------
dict[str, float]
Dictionary of predicted relative energy errors for all models.
"""
results: dict[str, float | None] = {}
for model_name in MODELS:
preds = isomer_relative_energies.get(model_name, [])
pairs = [
(ref, pred)
for ref, pred in zip(isomer_relative_energies["ref"], preds, strict=True)
if pred is not None
]
if not pairs:
results[model_name] = None
continue
ref_vals, pred_vals = zip(*pairs, strict=True)
results[model_name] = mae(list(ref_vals), list(pred_vals))
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "isomer_complexes_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
weights=DEFAULT_WEIGHTS,
)
def metrics(isomer_complex_errors: dict[str, float | None]) -> dict[str, dict]:
"""
Collect metrics for lanthanide isomer complexes.

Parameters
----------
isomer_complex_errors
Mean absolute errors for all models.

Returns
-------
dict[str, dict]
Metrics keyed by name for all models.
"""
return {"MAE": isomer_complex_errors}


def test_isomer_complexes(metrics: dict[str, dict]) -> None:
"""
Run lanthanide isomer complexes benchmark analysis.

Parameters
----------
metrics
All lanthanide isomer complex metrics.
"""
return
7 changes: 7 additions & 0 deletions ml_peg/analysis/lanthanides/isomer_complexes/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
metrics:
MAE:
good: 0.0
bad: 10.0
unit: kcal/mol
tooltip: Mean absolute error for relative isomer energies
level_of_theory: r2SCAN-3c
89 changes: 89 additions & 0 deletions ml_peg/app/lanthanides/isomer_complexes/app_isomer_complexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Run lanthanide isomer complex benchmark app."""

from __future__ import annotations

from dash import Dash
from dash.html import Div

from ml_peg.app import APP_ROOT
from ml_peg.app.base_app import BaseApp
from ml_peg.app.utils.build_callbacks import plot_from_table_column, struct_from_scatter
from ml_peg.app.utils.load import read_plot
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
BENCHMARK_NAME = "Lanthanide Isomer Complexes"
DOCS_URL = (
"https://ddmms.github.io/ml-peg/user_guide/benchmarks/lanthanides.html"
"#isomer-complexes"
)
DATA_PATH = APP_ROOT / "data" / "lanthanides" / "isomer_complexes"


class IsomerComplexesApp(BaseApp):
"""Lanthanide isomer complex benchmark app layout and callbacks."""

def register_callbacks(self) -> None:
"""Register callbacks to app."""
scatter = read_plot(
DATA_PATH / "figure_isomer_complexes.json",
id=f"{BENCHMARK_NAME}-figure",
)

plot_from_table_column(
table_id=self.table_id,
plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
column_to_plot={"MAE": scatter},
)

# Use first model's structures for visualization
if MODELS:
structs_dir = DATA_PATH / MODELS[0]
structs = [
f"assets/lanthanides/isomer_complexes/{MODELS[0]}/{struct_file.stem}.xyz"
for struct_file in sorted(structs_dir.glob("*.xyz"))
]

struct_from_scatter(
scatter_id=f"{BENCHMARK_NAME}-figure",
struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
structs=structs,
mode="struct",
)


def get_app() -> IsomerComplexesApp:
"""
Get lanthanide isomer complex benchmark app layout and callback registration.

Returns
-------
IsomerComplexesApp
Benchmark layout and callback registration.
"""
return IsomerComplexesApp(
name=BENCHMARK_NAME,
description=(
"Relative energies of lanthanide isomer complexes compared to r2SCAN-3c."
),
docs_url=DOCS_URL,
table_path=DATA_PATH / "isomer_complexes_metrics_table.json",
extra_components=[
Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
],
)


if __name__ == "__main__":
# Create Dash app
full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)

# Construct layout and register callbacks
app_instance = get_app()
full_app.layout = app_instance.layout
app_instance.register_callbacks()

# Run app
full_app.run(port=8061, debug=True)
2 changes: 2 additions & 0 deletions ml_peg/app/lanthanides/lanthanides.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
title: Lanthanides
description: Relative energies for lanthanide isomer complexes
Loading