-
Notifications
You must be signed in to change notification settings - Fork 42
Criegee22 Benchmark #256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Criegee22 Benchmark #256
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
e10bc34
Add Criegee22 calculation files
a485f71
Rename calc and fix spin,charge
f76a580
tqdm progress bar
1b215ff
Add Criegee22 analysis files
b7308c6
Change benchamrk name
3e301e1
Add Criegee22 app
b9359fc
Remove unused energy conversion constant
69a0cfd
Remove mlipx
ElliottKasoar b040126
Add total to tqdm for calculation
ElliottKasoar 4bb3789
Tidy code
ElliottKasoar 44c9574
Save and display reactants and TS
ElliottKasoar bca50b3
fix visualiation of strucutures (hover data, structure mismatch) + vi…
joehart2001 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
150 changes: 150 additions & 0 deletions
150
ml_peg/analysis/molecular_reactions/Criegee22/analyse_Criegee22.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| """ | ||
| Analyse Criegee22 reaction barrier benchmark. | ||
|
|
||
| 10.1021/acs.jpca.8b09349 | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from pathlib import Path | ||
|
|
||
| from ase import units | ||
| from ase.io import read, write | ||
| import pytest | ||
|
|
||
| from ml_peg.analysis.utils.decorators import build_table, plot_parity | ||
| from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae | ||
| from ml_peg.app import APP_ROOT | ||
| from ml_peg.calcs import CALCS_ROOT | ||
| from ml_peg.models.get_models import load_models | ||
| from ml_peg.models.models import current_models | ||
|
|
||
| MODELS = load_models(current_models) | ||
| D3_MODEL_NAMES = build_d3_name_map(MODELS) | ||
|
|
||
| EV_TO_KCAL = units.mol / units.kcal | ||
| CALC_PATH = CALCS_ROOT / "molecular_reactions" / "Criegee22" / "outputs" | ||
| OUT_PATH = APP_ROOT / "data" / "molecular_reactions" / "Criegee22" | ||
|
|
||
| METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml") | ||
| DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config( | ||
| METRICS_CONFIG_PATH | ||
| ) | ||
|
|
||
|
|
||
| def labels() -> list: | ||
| """ | ||
| Get list of system names. | ||
|
|
||
| Returns | ||
| ------- | ||
| list | ||
| List of all system names. | ||
| """ | ||
| for model_name in MODELS: | ||
| labels_list = [ | ||
| path.stem for path in sorted((CALC_PATH / model_name).glob("*.xyz")) | ||
| ] | ||
| break | ||
| return labels_list | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| @plot_parity( | ||
| filename=OUT_PATH / "figure_criegee22_barriers.json", | ||
| title="Reaction barriers", | ||
| x_label="Predicted barrier / kcal/mol", | ||
| y_label="Reference barrier / kcal/mol", | ||
| hoverdata={ | ||
| "Labels": labels(), | ||
| }, | ||
| ) | ||
| def barrier_heights() -> dict[str, list]: | ||
| """ | ||
| Get barrier heights for all systems. | ||
|
|
||
| Returns | ||
| ------- | ||
| dict[str, list] | ||
| Dictionary of all reference and predicted barrier heights. | ||
| """ | ||
| results = {"ref": []} | {mlip: [] for mlip in MODELS} | ||
| ref_stored = False | ||
|
|
||
| for model_name in MODELS: | ||
| for label in labels(): | ||
| structs = read(CALC_PATH / model_name / f"{label}.xyz", index=":") | ||
|
|
||
| results[model_name].append( | ||
| (structs[1].info["model_energy"] - structs[0].info["model_energy"]) | ||
| * EV_TO_KCAL | ||
| ) | ||
| if not ref_stored: | ||
| results["ref"].append( | ||
| (structs[1].info["ref_energy"] - structs[0].info["ref_energy"]) | ||
| * EV_TO_KCAL | ||
| ) | ||
|
|
||
| # Write structures for app | ||
| structs_dir = OUT_PATH / model_name | ||
| structs_dir.mkdir(parents=True, exist_ok=True) | ||
| write(structs_dir / f"{label}_rct.xyz", structs) | ||
| ref_stored = True | ||
| return results | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def get_mae(barrier_heights) -> dict[str, float]: | ||
| """ | ||
| Get mean absolute error for barrier heights. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| barrier_heights | ||
| Dictionary of reference and predicted barrier heights. | ||
|
|
||
| Returns | ||
| ------- | ||
| dict[str, float] | ||
| Dictionary of predicted barrier height errors for all models. | ||
| """ | ||
| results = {} | ||
| for model_name in MODELS: | ||
| results[model_name] = mae(barrier_heights["ref"], barrier_heights[model_name]) | ||
| return results | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| @build_table( | ||
| filename=OUT_PATH / "criegee22_barriers_metrics_table.json", | ||
| metric_tooltips=DEFAULT_TOOLTIPS, | ||
| thresholds=DEFAULT_THRESHOLDS, | ||
| mlip_name_map=D3_MODEL_NAMES, | ||
| ) | ||
| def metrics(get_mae: dict[str, float]) -> dict[str, dict]: | ||
| """ | ||
| Get all metrics. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| get_mae | ||
| Mean absolute errors for all models. | ||
|
|
||
| Returns | ||
| ------- | ||
| dict[str, dict] | ||
| Metric names and values for all models. | ||
| """ | ||
| return {"MAE": get_mae} | ||
|
|
||
|
|
||
| def test_criegee22_barriers(metrics: dict[str, dict]) -> None: | ||
| """ | ||
| Run Criegee22 barriers test. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| metrics | ||
| All new benchmark metric names and dictionary of values for each model. | ||
| """ | ||
| return |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| metrics: | ||
| MAE: | ||
| good: 0.0 | ||
| bad: 20.0 | ||
| unit: kcal/mol | ||
| tooltip: Mean Absolute Error for all systems | ||
| level_of_theory: CCSDT(Q)/CBS |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,90 @@ | ||
| """Run Criegee22 app.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from dash import Dash | ||
| from dash.html import Div | ||
|
|
||
| from ml_peg.app import APP_ROOT | ||
| from ml_peg.app.base_app import BaseApp | ||
| from ml_peg.app.utils.build_callbacks import ( | ||
| plot_from_table_column, | ||
| struct_from_scatter, | ||
| ) | ||
| from ml_peg.app.utils.load import read_plot | ||
| from ml_peg.models.get_models import get_model_names | ||
| from ml_peg.models.models import current_models | ||
|
|
||
| MODELS = get_model_names(current_models) | ||
| BENCHMARK_NAME = "Criegee22" | ||
| DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular_reactions.html#Criegee22" | ||
| DATA_PATH = APP_ROOT / "data" / "molecular_reactions" / "Criegee22" | ||
|
|
||
|
|
||
| class Criegee22App(BaseApp): | ||
| """Criegee22 benchmark app layout and callbacks.""" | ||
|
|
||
| def register_callbacks(self) -> None: | ||
| """Register callbacks to app.""" | ||
| scatter = read_plot( | ||
| DATA_PATH / "figure_criegee22_barriers.json", | ||
| id=f"{BENCHMARK_NAME}-figure", | ||
| ) | ||
|
|
||
| model_dir = DATA_PATH / MODELS[0] | ||
| if model_dir.exists(): | ||
| base_labels = sorted( | ||
| [f.stem.rsplit("_", 1)[0] for f in model_dir.glob("*_rct.xyz")] | ||
| ) | ||
| structs = [ | ||
| f"assets/molecular_reactions/Criegee22/{MODELS[0]}/{label}_rct.xyz" | ||
| for label in base_labels | ||
| ] | ||
| else: | ||
| structs = [] | ||
|
|
||
| plot_from_table_column( | ||
| table_id=self.table_id, | ||
| plot_id=f"{BENCHMARK_NAME}-figure-placeholder", | ||
| column_to_plot={"MAE": scatter}, | ||
| ) | ||
|
|
||
| struct_from_scatter( | ||
| scatter_id=f"{BENCHMARK_NAME}-figure", | ||
| struct_id=f"{BENCHMARK_NAME}-struct-placeholder", | ||
| structs=structs, | ||
| mode="struct", | ||
| ) | ||
|
|
||
|
|
||
| def get_app() -> Criegee22App: | ||
| """ | ||
| Get Criegee22 benchmark app layout and callback registration. | ||
|
|
||
| Returns | ||
| ------- | ||
| Criegee22App | ||
| Benchmark layout and callback registration. | ||
| """ | ||
| return Criegee22App( | ||
| name=BENCHMARK_NAME, | ||
| description=( | ||
| "Performance in predicting barrier heights for the " | ||
| "Criegee22 reaction benchmark. " | ||
| "Reference data from CCSDT(Q) calculations." | ||
| ), | ||
| docs_url=DOCS_URL, | ||
| table_path=DATA_PATH / "criegee22_barriers_metrics_table.json", | ||
| extra_components=[ | ||
| Div(id=f"{BENCHMARK_NAME}-figure-placeholder"), | ||
| Div(id=f"{BENCHMARK_NAME}-struct-placeholder"), | ||
| ], | ||
| ) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent) | ||
| benchmark_app = get_app() | ||
| full_app.layout = benchmark_app.layout | ||
| benchmark_app.register_callbacks() | ||
| full_app.run(port=8068, debug=True) | ||
78 changes: 78 additions & 0 deletions
78
ml_peg/calcs/molecular_reactions/Criegee22/calc_Criegee22.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| """ | ||
| Calculate the Criegee22 dataset for reactions involving Crieegee intermediates. | ||
|
|
||
| C. D. Smith, A. Karton. J. Comput. Chem. 2020, 41, 328–339. | ||
| DOI: 10.1002/jcc.26106 | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| from pathlib import Path | ||
| from typing import Any | ||
|
|
||
| from ase import units | ||
| from ase.io import read, write | ||
| import pytest | ||
| from tqdm import tqdm | ||
|
|
||
| from ml_peg.calcs.utils.utils import download_s3_data | ||
| from ml_peg.models.get_models import load_models | ||
| from ml_peg.models.models import current_models | ||
|
|
||
| MODELS = load_models(current_models) | ||
|
|
||
| KJ_TO_EV = units.kJ / units.mol | ||
|
|
||
| OUT_PATH = Path(__file__).parent / "outputs" | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("mlip", MODELS.items()) | ||
| def test_criegee22(mlip: tuple[str, Any]) -> None: | ||
| """ | ||
| Run Criegee22 benchmark. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| mlip | ||
| Name of model use and model to get calculator. | ||
| """ | ||
| model_name, model = mlip | ||
| calc = model.get_calculator() | ||
|
|
||
| data_path = ( | ||
| download_s3_data( | ||
| filename="Criegee22.zip", | ||
| key="inputs/molecular_reactions/Criegee22/Criegee22.zip", | ||
| ) | ||
| / "Criegee22" | ||
| ) | ||
|
|
||
| # Read in data and attach calculator | ||
| calc = model.get_calculator() | ||
| # Add D3 calculator for this test | ||
| calc = model.add_d3_calculator(calc) | ||
|
|
||
| with open(data_path / "reference.txt") as lines: | ||
| # Skip header | ||
| next(lines) | ||
| for line in tqdm(lines, total=22): | ||
| items = line.strip().split() | ||
| label = items[0] | ||
| bh_ref = float(items[8]) * KJ_TO_EV | ||
| atoms_reac = read(data_path / "structures" / f"{label}-reac.xyz") | ||
| atoms_reac.calc = calc | ||
| atoms_reac.info["charge"] = 0 | ||
| atoms_reac.info["spin"] = 1 | ||
| atoms_reac.info["model_energy"] = atoms_reac.get_potential_energy() | ||
| atoms_reac.info["ref_energy"] = 0 | ||
|
|
||
| atoms_ts = read(data_path / "structures" / f"{label}-TS.xyz") | ||
| atoms_ts.calc = calc | ||
| atoms_ts.info["charge"] = 0 | ||
| atoms_ts.info["spin"] = 1 | ||
| atoms_ts.info["model_energy"] = atoms_ts.get_potential_energy() | ||
| atoms_ts.info["ref_energy"] = bh_ref | ||
|
|
||
| write_dir = OUT_PATH / model_name | ||
| write_dir.mkdir(parents=True, exist_ok=True) | ||
| write(write_dir / f"{label}.xyz", [atoms_reac, atoms_ts]) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.