diff --git a/docs/source/user_guide/benchmarks/bulk_crystal.rst b/docs/source/user_guide/benchmarks/bulk_crystal.rst index 575bbcc11..db2f5d7d2 100644 --- a/docs/source/user_guide/benchmarks/bulk_crystal.rst +++ b/docs/source/user_guide/benchmarks/bulk_crystal.rst @@ -59,7 +59,6 @@ Reference data: Physics, 163(18). * PBE-D3(BJ) - Elasticity ========== @@ -95,13 +94,19 @@ Mean absolute error (MAE) between predicted and reference shear modulus (G) valu Calculated alongside (1), with the same exclusion criteria used in analysis. +(3) Elastic tensor MAE + +Element-wise mean absolute error of the 6x6 Voigt-form elasticity tensor. + +Symmetry-independent elastic constants are extracted based on crystal system: +triclinic, monoclinic, orthorhombic, tetragonal, trigonal, hexagonal, or cubic. +Symmetry checks are applied to components on the diagonal with a relative tolerance of 10%. If checks fail, a triclinic symmetry is assumed. Tensor components which are zero in both the reference and comparison tensors are excluded. Computational cost ------------------ High: tests are likely to take hours-days to run on GPU. - Data availability ----------------- diff --git a/ml_peg/analysis/bulk_crystal/elasticity/analyse_elasticity.py b/ml_peg/analysis/bulk_crystal/elasticity/analyse_elasticity.py index 6472f0717..408ed0b70 100644 --- a/ml_peg/analysis/bulk_crystal/elasticity/analyse_elasticity.py +++ b/ml_peg/analysis/bulk_crystal/elasticity/analyse_elasticity.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Any +import numpy as np import pandas as pd import pytest @@ -33,6 +34,221 @@ K_COLUMN = "K_vrh" G_COLUMN = "G_vrh" +E_TENSOR_COLUMN = "elastic_tensor" +SYMMETRY_COLUMN = "crystal_system" + +# Sources: +# Physical Properties of Crystals: An Introduction (pp 215) +# https://ocean-jh.github.io/elastic-mechanics/ + +VOIGT_SYMMETRIES = { + "triclinic": { + "indices": [ + (0, 0), + (1, 1), + (2, 2), + (0, 1), + (0, 2), + (1, 2), + (0, 3), + (0, 4), + (0, 5), + (1, 3), + (1, 4), + (1, 5), + (2, 3), + (2, 4), + (2, 5), + (3, 3), + (4, 4), + (5, 5), + (3, 4), + (3, 5), + (4, 5), + ], + "labels": [ + "C11", + "C22", + "C33", + "C12", + "C13", + "C23", + "C14", + "C15", + "C16", + "C24", + "C25", + "C26", + "C34", + "C35", + "C36", + "C44", + "C55", + "C66", + "C45", + "C46", + "C56", + ], + }, + "monoclinic": { + "indices": [ + (0, 0), + (1, 1), + (2, 2), + (0, 1), + (0, 2), + (1, 2), + (3, 3), + (4, 4), + (5, 5), + (0, 3), + (1, 3), + (2, 3), + (4, 5), + ], + "labels": [ + "C11", + "C22", + "C33", + "C12", + "C13", + "C23", + "C44", + "C55", + "C66", + "C14", + "C24", + "C34", + "C56", + ], + }, + "orthorhombic": { + "indices": [ + (0, 0), + (1, 1), + (2, 2), + (0, 1), + (0, 2), + (1, 2), + (3, 3), + (4, 4), + (5, 5), + ], + "labels": ["C11", "C22", "C33", "C12", "C13", "C23", "C44", "C55", "C66"], + }, + "tetragonal_7": { + "indices": [(0, 0), (0, 1), (0, 2), (2, 2), (3, 3), (5, 5), (0, 5)], + "labels": ["C11", "C12", "C13", "C33", "C44", "C66", "C16"], + }, + "tetragonal_6": { + "indices": [(0, 0), (0, 1), (0, 2), (2, 2), (3, 3), (5, 5)], + "labels": ["C11", "C12", "C13", "C33", "C44", "C66"], + }, + "trigonal_7": { + "indices": [(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (2, 2), (3, 3)], + "labels": ["C11", "C12", "C13", "C14", "C15", "C33", "C44"], + }, + "trigonal_6": { + "indices": [(0, 0), (0, 1), (0, 2), (0, 3), (2, 2), (3, 3)], + "labels": ["C11", "C12", "C13", "C14", "C33", "C44"], + }, + "hexagonal": { + "indices": [(0, 0), (0, 1), (0, 2), (2, 2), (3, 3)], + "labels": ["C11", "C12", "C13", "C33", "C44"], + }, + "cubic": { + "indices": [(0, 0), (0, 1), (3, 3)], + "labels": ["C11", "C12", "C44"], + }, +} + + +def get_independent_cs(c_ref, c_arr, crystal_symmetry, rtol=0.10): + """ + Extract symmetry-independent elastic constants. + + Parameters + ---------- + c_ref : (6, 6) array_like + Reference Voigt tensor. + c_arr : (6, 6) array_like + Comparison Voigt tensor. + crystal_symmetry : str + Crystal symmetry label. + rtol : float, optional + Relative tolerance for consistency checks (default: 0.10). + + Returns + ------- + tuple of ndarray + Independent elastic constants for reference and comparison tensors. + """ + + def _allclose(values): + """ + Check whether all values in an array are close to the first element. + + Parameters + ---------- + values : array_like + Sequence of numeric values to compare. The first value is taken + as the reference. + + Returns + ------- + bool + True if all values are close to the reference value within the + specified relative tolerance; False otherwise. + + Notes + ----- + If the reference value is zero, all elements must be exactly zero to + return True. + """ + values = np.asarray(values) + ref = values[0] + if ref == 0.0: + return np.all(values == 0.0) + return np.all(np.abs(values - ref) <= rtol * np.abs(ref)) + + pass_symm_check = True + for c in [c_ref, c_arr]: + if c.shape != (6, 6): + raise ValueError("C must be a 6x6 matrix") + + if crystal_symmetry == "cubic": + if not _allclose([c[0, 0], c[1, 1], c[2, 2]]): + pass_symm_check = False + if not _allclose([c[3, 3], c[4, 4], c[5, 5]]): + pass_symm_check = False + + elif crystal_symmetry in ( + "hexagonal", + "trigonal_6", + "trigonal_7", + "tetragonal_6", + "tetragonal_7", + ): + if not _allclose([c[0, 0], c[1, 1]]): + pass_symm_check = False + if not _allclose([c[3, 3], c[4, 4]]): + pass_symm_check = False + + elif crystal_symmetry in ("orthorhombic", "monoclinic", "triclinic"): + pass_symm_check = False + + if pass_symm_check: + voigt = VOIGT_SYMMETRIES[crystal_symmetry] + rows, cols = zip(*voigt["indices"], strict=False) + return c_ref[rows, cols], c_arr[rows, cols] + + voigt = VOIGT_SYMMETRIES["triclinic"] + rows, cols = zip(*voigt["indices"], strict=False) + vals_ref = c_ref[rows, cols] + vals_arr = c_arr[rows, cols] + tol = 1e-3 + mask = (np.abs(vals_arr) > tol) | (np.abs(vals_ref) > tol) + return vals_ref[mask], vals_arr[mask] def _filter_results(df: pd.DataFrame, model_name: str) -> tuple[pd.DataFrame, int]: @@ -41,15 +257,15 @@ def _filter_results(df: pd.DataFrame, model_name: str) -> tuple[pd.DataFrame, in Parameters ---------- - df + df : pd.DataFrame Dataframe containing raw benchmark results. - model_name - Model whose columns should be filtered. + model_name : str + Model whose columns should be filtered.. Returns ------- tuple[pd.DataFrame, int] - Filtered dataframe and number of excluded systems. + Filtered dataframe and number of excluded entries. """ mask_bulk = df[f"{K_COLUMN}_{model_name}"].between(-50, 600) mask_shear = df[f"{G_COLUMN}_{model_name}"].between(-50, 600) @@ -66,14 +282,13 @@ def elasticity_stats() -> dict[str, dict[str, Any]]: Returns ------- dict[str, dict[str, Any]] - Processed information per model (bulk, shear, exclusion counts). + Aggregated statistics including bulk, shear, elastic tensors, and exclusions. """ OUT_PATH.mkdir(parents=True, exist_ok=True) stats: dict[str, dict[str, Any]] = {} for model_name in MODELS: results_path = CALC_PATH / model_name / "moduli_results.csv" df = pd.read_csv(results_path) - filtered, excluded = _filter_results(df, model_name) stats[model_name] = { @@ -85,6 +300,14 @@ def elasticity_stats() -> dict[str, dict[str, Any]]: "ref": filtered[f"{G_COLUMN}_DFT"].tolist(), "pred": filtered[f"{G_COLUMN}_{model_name}"].tolist(), }, + "elastic_tensor": { + "ref": filtered[f"{E_TENSOR_COLUMN}_DFT"].tolist(), + "pred": filtered[f"{E_TENSOR_COLUMN}_{model_name}"].tolist(), + }, + "crystal_system": { + "ref": filtered[f"{SYMMETRY_COLUMN}_DFT"].tolist(), + "pred": filtered[f"{SYMMETRY_COLUMN}_{model_name}"].tolist(), + }, "excluded": excluded, } @@ -94,17 +317,17 @@ def elasticity_stats() -> dict[str, dict[str, Any]]: @pytest.fixture def bulk_mae(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, float | None]: """ - Mean absolute error for bulk modulus predictions. + Compute MAE for bulk modulus predictions. Parameters ---------- - elasticity_stats - Aggregated bulk/shear data per model. + elasticity_stats : dict + Aggregated benchmark statistics. Returns ------- dict[str, float | None] - MAE values for each model (``None`` if no data). + Bulk modulus MAE per model. """ results: dict[str, float | None] = {} for model_name in MODELS: @@ -116,17 +339,17 @@ def bulk_mae(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, float | N @pytest.fixture def shear_mae(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, float | None]: """ - Mean absolute error for shear modulus predictions. + Compute MAE for shear modulus predictions. Parameters ---------- - elasticity_stats - Aggregated bulk/shear data per model. + elasticity_stats : dict + Aggregated benchmark statistics. Returns ------- dict[str, float | None] - MAE values for each model (``None`` if no data). + Shear modulus MAE per model. """ results: dict[str, float | None] = {} for model_name in MODELS: @@ -135,6 +358,64 @@ def shear_mae(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, float | return results +@pytest.fixture +def elastic_tensor_mae( + elasticity_stats: dict[str, dict[str, Any]], +) -> dict[str, float | None]: + """ + Compute MAE for elastic tensor predictions. + + Parameters + ---------- + elasticity_stats : dict + Aggregated benchmark statistics. + + Returns + ------- + dict[str, float | None] + Elastic tensor MAE per model. + """ + + def _str_to_array(s: str) -> np.ndarray: + """ + Convert a string representation of a 6x6 elastic tensor into a NumPy array. + + Parameters + ---------- + s : str + String representation of a 6x6 elastic tensor. The string may + contain square brackets and whitespace-separated numbers. + + Returns + ------- + np.ndarray + A 6x6 NumPy array containing the numeric values from the input string. + """ + s_clean = s.replace("[", "").replace("]", "") + return np.fromstring(s_clean, sep=" ").reshape(6, 6) + + results: dict[str, float | None] = {} + for model_name in MODELS: + prop = elasticity_stats.get(model_name, {}).get("elastic_tensor") + crystal_system = elasticity_stats.get(model_name, {}).get("crystal_system") + if prop is None or not prop["ref"]: + results[model_name] = None + continue + + tensor_maes = [] + for r, p, cs in zip( + prop["ref"], prop["pred"], crystal_system["ref"], strict=False + ): + r_arr = np.asarray(r) if isinstance(r, np.ndarray) else _str_to_array(r) + p_arr = np.asarray(p) if isinstance(p, np.ndarray) else _str_to_array(p) + r_arr[np.abs(r_arr) < 0.01] = 0.0 + p_arr[np.abs(p_arr) < 0.01] = 0.0 + r_vals, p_vals = get_independent_cs(r_arr, p_arr, cs) + tensor_maes.append(mae(r_vals, p_vals)) + results[model_name] = float(np.mean(tensor_maes)) + return results + + @pytest.fixture @plot_density_scatter( filename=OUT_PATH / "figure_bulk_density.json", @@ -144,17 +425,17 @@ def shear_mae(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, float | ) def bulk_density(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, dict]: """ - Density scatter inputs for bulk modulus. + Prepare density scatter data for bulk modulus. Parameters ---------- - elasticity_stats - Aggregated bulk/shear data per model. + elasticity_stats : dict + Aggregated benchmark statistics. Returns ------- dict[str, dict] - Mapping of model name to density-scatter data. + Density-scatter data per model. """ return build_density_inputs(MODELS, elasticity_stats, "bulk", metric_fn=mae) @@ -168,17 +449,17 @@ def bulk_density(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, dict] ) def shear_density(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, dict]: """ - Density scatter inputs for shear modulus. + Prepare density scatter data for shear modulus. Parameters ---------- - elasticity_stats - Aggregated bulk/shear data per model. + elasticity_stats : dict + Aggregated benchmark statistics. Returns ------- dict[str, dict] - Mapping of model name to density-scatter data. + Density-scatter data per model. """ return build_density_inputs(MODELS, elasticity_stats, "shear", metric_fn=mae) @@ -193,25 +474,29 @@ def shear_density(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, dict def metrics( bulk_mae: dict[str, float | None], shear_mae: dict[str, float | None], + elastic_tensor_mae: dict[str, float | None], ) -> dict[str, dict]: """ - All elasticity metrics. + Aggregate all elasticity metrics. Parameters ---------- - bulk_mae + bulk_mae : dict Bulk modulus MAE per model. - shear_mae + shear_mae : dict Shear modulus MAE per model. + elastic_tensor_mae : dict + Elastic tensor MAE per model. Returns ------- dict[str, dict] - Mapping of metric name to model-value dictionaries. + Dictionary mapping metric names to model values. """ return { "Bulk modulus MAE": bulk_mae, "Shear modulus MAE": shear_mae, + "Elasticity tensor MAE": elastic_tensor_mae, } @@ -225,11 +510,11 @@ def test_elasticity( Parameters ---------- - metrics + metrics : dict Benchmark metric values. - bulk_density - Density scatter inputs for bulk modulus. - shear_density - Density scatter inputs for shear modulus. + bulk_density : dict + Density scatter data for bulk modulus. + shear_density : dict + Density scatter data for shear modulus. """ return diff --git a/ml_peg/analysis/bulk_crystal/elasticity/metrics.yml b/ml_peg/analysis/bulk_crystal/elasticity/metrics.yml index 92101110e..0e28d941d 100644 --- a/ml_peg/analysis/bulk_crystal/elasticity/metrics.yml +++ b/ml_peg/analysis/bulk_crystal/elasticity/metrics.yml @@ -11,3 +11,9 @@ metrics: unit: GPa tooltip: Mean absolute error of VRH shear modulus (lower is better). Excludes systems with shear moduli < -50 GPa and > 500 GPa. level_of_theory: PBE + Elasticity tensor MAE: + good: 5.0 + bad: 50.0 + unit: GPa + tooltip: Mean absolute error of all nonzero elements of the full elasticity tensor (lower is better). Computed per-tensor and then averaged across systems. + level_of_theory: PBE diff --git a/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py b/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py index bfb14ae25..4eff795ac 100644 --- a/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py +++ b/ml_peg/calcs/bulk_crystal/elasticity/calc_elasticity.py @@ -5,7 +5,13 @@ from pathlib import Path from typing import Any -from matcalc.benchmark import ElasticityBenchmark +from ase.calculators.calculator import Calculator +from matcalc._base import PropCalc +from matcalc._elasticity import ElasticityCalc +from matcalc.benchmark import Benchmark +from matcalc.units import eVA3ToGPa +import numpy as np +from pymatgen.symmetry.analyzer import SpacegroupAnalyzer import pytest from ml_peg.models.get_models import load_models @@ -15,6 +21,144 @@ OUT_PATH = Path(__file__).parent / "outputs" +def get_crystal_system(struct): + """ + Determine a crystal-system label. + + Refine the crystal-system label returned by pymatgen using + the crystallographic point group to distinguish symmetry + subclasses relevant for elastic constants. + + Parameters + ---------- + struct : pymatgen.core.structure.Structure + Structure to analyse. + + Returns + ------- + str + Crystal-system label. + """ + sga = SpacegroupAnalyzer(struct) + crystal_system = sga.get_crystal_system().lower() + point_group = sga.get_point_group_symbol() + if crystal_system == "tetragonal": + return "tetragonal_7" if point_group in {"4", "-4", "4/m"} else "tetragonal_6" + + if crystal_system in {"trigonal", "rhombohedral"}: + return "trigonal_7" if point_group in {"3", "-3"} else "trigonal_6" + + return crystal_system + + +class CustomElasticityBenchmark(Benchmark): # noqa: PR01 + """ + Extend the matcalc Benchmark to output full elastic tensors. + + This allows extraction and comparison of elasticity tensors + for materials. + + Parameters + ---------- + index_name : str, optional + Name of the index used to identify benchmark records (default is "mp_id"). + benchmark_name : str or Path, optional + Name or path of the benchmark dataset, + default is "mp-binary-pbe-elasticity-2025.1.json.gz". + **kwargs : dict, optional + Additional keyword arguments forwarded to the parent class. + """ + + def __init__( + self, + index_name: str = "mp_id", + benchmark_name: str | Path = "mp-binary-pbe-elasticity-2025.1.json.gz", + **kwargs, + ) -> None: + """ + Initialize the elasticity benchmark. + + Parameters + ---------- + index_name : str, optional + Name of the index used to identify benchmark records (default is "mp_id"). + benchmark_name : str or Path, optional + Name or path of the benchmark dataset, + default is "mp-binary-pbe-elasticity-2025.1.json.gz". + **kwargs : dict, optional + Additional keyword arguments forwarded to the parent class. + """ + kwargs.setdefault( + "properties", ("bulk_modulus_vrh", "shear_modulus_vrh", "elasticity_tensor") + ) + kwargs.setdefault( + "property_rename_map", {"bulk_modulus": "K", "shear_modulus": "G"} + ) + kwargs.setdefault("other_fields", ("formula",)) + super().__init__( + benchmark_name, + index_name=index_name, + **kwargs, + ) + for struct, row in zip(self.structures, self.ground_truth, strict=False): + row["crystal_system_DFT"] = get_crystal_system(struct) + + def get_prop_calc(self, calculator: str | Calculator, **kwargs: Any) -> PropCalc: + """ + Create a property calculation object. + + Parameters + ---------- + calculator : str or Calculator + Calculator used to evaluate elastic properties. + **kwargs : dict + Additional configuration options. + + Returns + ------- + PropCalc + Configured property calculation object. + """ + kwargs.setdefault("fmax", 0.05) + return ElasticityCalc(calculator, **kwargs) + + def process_result(self, result: dict | None, model_name: str) -> dict: # noqa: SS05 + """ + Extract data from a benchmark result. + + Parameters + ---------- + result : dict or None + Result dictionary containing elastic properties. + model_name : str + Name of the model used to label output keys. + + Returns + ------- + dict + Dictionary of processed elastic properties. + """ + key = get_crystal_system(result["final_structure"]) + return { + f"bulk_modulus_vrh_{model_name}": ( + result["bulk_modulus_vrh"] * eVA3ToGPa + if result is not None + else float("nan") + ), + f"shear_modulus_vrh_{model_name}": ( + result["shear_modulus_vrh"] * eVA3ToGPa + if result is not None + else float("nan") + ), + f"elastic_tensor_{model_name}": ( + result["elastic_tensor"] * eVA3ToGPa + if result is not None + else float("nan") + ), + f"crystal_system_{model_name}": key, + } + + def run_elasticity_benchmark( *, calc, @@ -30,34 +174,34 @@ def run_elasticity_benchmark( fmax: float = 0.05, ) -> None: """ - Run elasticity benchmark and write results to CSV. + Run the elasticity benchmark and write results to CSV. Parameters ---------- calc ASE calculator for evaluating structures. - model_name + model_name : str Name of MLIP model. - out_dir + out_dir : Path Directory to write per-model outputs. - n_jobs + n_jobs : int Number of parallel workers for the benchmark. - norm_strains - Tuple of normal strains to apply. - shear_strains - Tuple of shear strains to apply. - relax_structure - Whether to relax the equilibrium structure before deformations. - relax_deformed_structures - Whether to relax each strained structure. - use_checkpoint - If True, writes intermediate checkpoints inside ``out_dir/checkpoints``. - n_materials - Number of materials sampled from the benchmark set. If None, use all materials. - fmax + norm_strains : tuple of float + Normal strains to apply. + shear_strains : tuple of float + Shear strains to apply. + relax_structure : bool + Relax the equilibrium structure before deformations. + relax_deformed_structures : bool + Relax each strained structure. + use_checkpoint : bool + Whether to write intermediate checkpoints. + n_materials : int or None + Number of materials to sample. None means all materials. + fmax : float Force threshold for structural relaxations. """ - benchmark = ElasticityBenchmark( + benchmark = CustomElasticityBenchmark( n_samples=n_materials, seed=2025, fmax=fmax, @@ -66,6 +210,7 @@ def run_elasticity_benchmark( norm_strains=norm_strains, shear_strains=shear_strains, benchmark_name="mp-pbe-elasticity-2025.3.json.gz", + properties=("bulk_modulus_vrh", "shear_modulus_vrh", "elastic_tensor"), ) out_dir.mkdir(parents=True, exist_ok=True) @@ -84,6 +229,16 @@ def run_elasticity_benchmark( delete_checkpoint_on_finish=False, ) + results["elastic_tensor_DFT"] = results["elastic_tensor_DFT"].apply( + lambda x: np.array(x["raw"]) if x is not None else None + ) + + for col in results.columns: + if col.startswith("elastic_tensor_") and col != "elastic_tensor_DFT": + results[col] = results[col].apply( + lambda x: x.voigt if x is not None else None + ) + results.to_csv(out_dir / "moduli_results.csv", index=False) @@ -91,11 +246,11 @@ def run_elasticity_benchmark( @pytest.mark.parametrize("mlip", MODELS.items()) def test_elasticity(mlip: tuple[str, Any]) -> None: """ - Run elasticity benchmark for a single model. + Run the elasticity benchmark for a single model. Parameters ---------- - mlip + mlip : tuple Model entry containing name and object capable of providing a calculator. """ model_name, model = mlip @@ -104,4 +259,7 @@ def test_elasticity(mlip: tuple[str, Any]) -> None: calc=calc, model_name=model_name, out_dir=OUT_PATH / model_name, + n_materials=100, + relax_structure=False, + relax_deformed_structures=False, )