Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
*.svg
*.xz
*.txt
*.csv
~*
*~
.project
Expand Down
64 changes: 61 additions & 3 deletions docs/source/user_guide/benchmarks/bulk_crystal.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
============
Bulk Crystal
============
=============
Bulk Crystals
=============

Lattice constants
=================
Expand Down Expand Up @@ -39,6 +39,7 @@ Computational cost

Low: tests are likely to less than a minute to run on CPU.


Data availability
-----------------

Expand All @@ -57,3 +58,60 @@ Reference data:
foundation model for atomistic materials chemistry. The Journal of Chemical
Physics, 163(18).
* PBE-D3(BJ)


Elastic Moduli
==============

Summary
-------

Bulk and shear moduli calculated for 12122 bulk crystals from the materials project.


Metrics
-------

(1) Bulk modulus MAE

Mean absolute error (MAE) between predicted and reference bulk modulus (B) values.

MatCalc's ElasticityCalc is used to deform the structures with normal (diagonal) strain
magnitudes of ±0.01 and ±0.005 for ϵ11, ϵ22, ϵ33, and off-diagonal strain magnitudes of
±0.06 and ±0.03 for ϵ23, ϵ13, ϵ12. The Voigt-Reuss-Hill (VRH) average is used to obtain
the bulk and shear moduli from the stress tensor. Both the initial and deformed
structures are relaxed with MatCalc's default ElasticityCalc settings. For more information, see
`MatCalc's ElasticityCalc documentation
<https://github.com/materialsvirtuallab/matcalc/blob/main/src/matcalc/_elasticity.py>`_.

Analysis excludes materials with:
* B ≤ 0, B > 500 and G ≥ 0, G > 500 structures.
* H2, N2, O2, F2, Cl2, He, Xe, Ne, Kr, Ar
* Materials with density < 0.5 (less dense than Li, the lowest density solid element)

(2) Shear modulus MAE

Mean absolute error (MAE) between predicted and reference shear modulus (G) values

Calculated alongside (1), with the same exclusion criteria used in analysis.


Computational cost
------------------

High: tests are likely to take hours-days to run on GPU.


Data availability
-----------------

Input structures:

* 1. De Jong, M. et al. Charting the complete elastic properties of
inorganic crystalline compounds. Sci Data 2, 150009 (2015).
* Dataset release: mp-pbe-elasticity-2025.3.json.gz from the Materials Project database.

Reference data:

* Same as input data
* PBE
235 changes: 235 additions & 0 deletions ml_peg/analysis/bulk_crystal/elasticity/analyse_elasticity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
"""Analyse elasticity benchmark."""

from __future__ import annotations

from pathlib import Path
from typing import Any

import pandas as pd
import pytest

from ml_peg.analysis.utils.decorators import (
build_table,
plot_density_scatter,
)
from ml_peg.analysis.utils.utils import (
build_density_inputs,
load_metrics_config,
mae,
)
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import get_model_names
from ml_peg.models.models import current_models

MODELS = get_model_names(current_models)
CALC_PATH = CALCS_ROOT / "bulk_crystal" / "elasticity" / "outputs"
OUT_PATH = APP_ROOT / "data" / "bulk_crystal" / "elasticity"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)

K_COLUMN = "K_vrh"
G_COLUMN = "G_vrh"


def _filter_results(df: pd.DataFrame, model_name: str) -> tuple[pd.DataFrame, int]:
"""
Filter outlier predictions and return remaining data with exclusion count.
Parameters
----------
df
Dataframe containing raw benchmark results.
model_name
Model whose columns should be filtered.
Returns
-------
tuple[pd.DataFrame, int]
Filtered dataframe and number of excluded systems.
"""
mask_bulk = df[f"{K_COLUMN}_{model_name}"].between(-50, 600)
mask_shear = df[f"{G_COLUMN}_{model_name}"].between(-50, 600)
valid = df[mask_bulk & mask_shear].copy()
excluded = len(df) - len(valid)
return valid, excluded


@pytest.fixture
def elasticity_stats() -> dict[str, dict[str, Any]]:
"""
Load and cache processed benchmark statistics per model.
Returns
-------
dict[str, dict[str, Any]]
Processed information per model (bulk, shear, exclusion counts).
"""
OUT_PATH.mkdir(parents=True, exist_ok=True)
stats: dict[str, dict[str, Any]] = {}
for model_name in MODELS:
results_path = CALC_PATH / model_name / "moduli_results.csv"
df = pd.read_csv(results_path)

filtered, excluded = _filter_results(df, model_name)

stats[model_name] = {
"bulk": {
"ref": filtered[f"{K_COLUMN}_DFT"].tolist(),
"pred": filtered[f"{K_COLUMN}_{model_name}"].tolist(),
},
"shear": {
"ref": filtered[f"{G_COLUMN}_DFT"].tolist(),
"pred": filtered[f"{G_COLUMN}_{model_name}"].tolist(),
},
"excluded": excluded,
}

return stats


@pytest.fixture
def bulk_mae(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, float | None]:
"""
Mean absolute error for bulk modulus predictions.
Parameters
----------
elasticity_stats
Aggregated bulk/shear data per model.
Returns
-------
dict[str, float | None]
MAE values for each model (``None`` if no data).
"""
results: dict[str, float | None] = {}
for model_name in MODELS:
prop = elasticity_stats.get(model_name, {}).get("bulk")
results[model_name] = mae(prop["ref"], prop["pred"])
return results


@pytest.fixture
def shear_mae(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, float | None]:
"""
Mean absolute error for shear modulus predictions.
Parameters
----------
elasticity_stats
Aggregated bulk/shear data per model.
Returns
-------
dict[str, float | None]
MAE values for each model (``None`` if no data).
"""
results: dict[str, float | None] = {}
for model_name in MODELS:
prop = elasticity_stats.get(model_name, {}).get("shear")
results[model_name] = mae(prop["ref"], prop["pred"])
return results


@pytest.fixture
@plot_density_scatter(
filename=OUT_PATH / "figure_bulk_density.json",
title="Bulk modulus density plot",
x_label="Reference bulk modulus / GPa",
y_label="Predicted bulk modulus / GPa",
)
def bulk_density(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, dict]:
"""
Density scatter inputs for bulk modulus.
Parameters
----------
elasticity_stats
Aggregated bulk/shear data per model.
Returns
-------
dict[str, dict]
Mapping of model name to density-scatter data.
"""
return build_density_inputs(MODELS, elasticity_stats, "bulk", metric_fn=mae)


@pytest.fixture
@plot_density_scatter(
filename=OUT_PATH / "figure_shear_density.json",
title="Shear modulus density plot",
x_label="Reference shear modulus / GPa",
y_label="Predicted shear modulus / GPa",
)
def shear_density(elasticity_stats: dict[str, dict[str, Any]]) -> dict[str, dict]:
"""
Density scatter inputs for shear modulus.
Parameters
----------
elasticity_stats
Aggregated bulk/shear data per model.
Returns
-------
dict[str, dict]
Mapping of model name to density-scatter data.
"""
return build_density_inputs(MODELS, elasticity_stats, "shear", metric_fn=mae)


@pytest.fixture
@build_table(
filename=OUT_PATH / "elasticity_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
weights=DEFAULT_WEIGHTS,
)
def metrics(
bulk_mae: dict[str, float | None],
shear_mae: dict[str, float | None],
) -> dict[str, dict]:
"""
All elasticity metrics.
Parameters
----------
bulk_mae
Bulk modulus MAE per model.
shear_mae
Shear modulus MAE per model.
Returns
-------
dict[str, dict]
Mapping of metric name to model-value dictionaries.
"""
return {
"Bulk modulus MAE": bulk_mae,
"Shear modulus MAE": shear_mae,
}


def test_elasticity(
metrics: dict[str, dict],
bulk_density: dict[str, dict],
shear_density: dict[str, dict],
) -> None:
"""
Run elasticity analysis.
Parameters
----------
metrics
Benchmark metric values.
bulk_density
Density scatter inputs for bulk modulus.
shear_density
Density scatter inputs for shear modulus.
"""
return
11 changes: 11 additions & 0 deletions ml_peg/analysis/bulk_crystal/elasticity/metrics.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
metrics:
Bulk modulus MAE:
good: 2.0
bad: 30.0
unit: GPa
tooltip: Mean absolute error of VRH bulk modulus (lower is better). Excludes systems with bulk moduli < -50 GPa and > 500 GPa.
Shear modulus MAE:
good: 2.0
bad: 30.0
unit: GPa
tooltip: Mean absolute error of VRH shear modulus (lower is better). Excludes systems with shear moduli < -50 GPa and > 500 GPa.
Loading