model_name: MatterSim v1 5M model_key: mattersim-v1-5M model_version: v1.0.0 date_added: '2024-12-16' date_published: '2024-05-08' authors: - name: Han Yang affiliation: Microsoft Research AI for Science email: hanyang@microsoft.com orcid: https://orcid.org/0000-0002-4531-093X - name: Chenxi Hu affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0009-0006-8486-9230 - name: Yichi Zhou affiliation: Microsoft Research AI for Science - name: Xixian Liu affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0009-0008-9215-3990 - name: Yu Shi affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0001-9235-8963 - name: Jielan Li affiliation: Microsoft Research AI for Science email: jielanli@microsoft.com - name: Guanzhi Li affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0002-4167-6432 - name: Zekun Chen affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0002-4183-2941 - name: Shuizhou Chen affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0009-0005-2701-5565 - name: Claudio Zeni affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0002-6334-2679 - name: Matthew Horton affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0001-7777-8871 - name: Robert Pinsler affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0003-1454-188X - name: Andrew Fowler affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0002-7360-3078 - name: Daniel Zügner affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0003-1626-5065 - name: Tian Xie affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0002-0987-4666 - name: Jake Smith affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0003-0412-1312 - name: Lixin Sun affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0002-7971-5222 - name: Qian Wang affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0009-0007-7680-4514 - name: Lingyu Kong affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0009-0006-2226-5730 - name: Chang Liu affiliation: Microsoft Research AI for Science orcid: https://orcid.org/0000-0001-5207-5440 - name: Hongxia Hao affiliation: Microsoft Research AI for Science email: hongxiahao@microsoft.com orcid: https://orcid.org/0000-0002-4382-200X - name: Ziheng Lu affiliation: Microsoft Research AI for Science email: zihenglu@microsoft.com orcid: https://orcid.org/0000-0003-2239-8526 repo: https://github.com/microsoft/mattersim doi: https://doi.org/10.48550/arXiv.2405.04967 paper: https://arxiv.org/abs/2405.04967 pr_url: https://github.com/janosh/matbench-discovery/pull/178 checkpoint_url: https://github.com/microsoft/mattersim/raw/refs/heads/main/pretrained_models/mattersim-v1.0.0-5M.pth license: code: MIT code_url: https://github.com/microsoft/mattersim/blob/d0a52e64fc/LICENSE.txt checkpoint: MIT checkpoint_url: https://github.com/microsoft/mattersim/blob/d0a52e64fc/MODEL_CARD.md requirements: Graphormer: v1 numpy: 1.26.2 pymatgen: 2024.5.1 python: '>=3.9' openness: OSCD trained_for_benchmark: false train_task: S2EFS test_task: IS2RE-SR targets: EFS_G model_type: UIP model_params: 4_549_142 n_estimators: 1 training_set: [MatterSim] training_cost: missing hyperparams: optimizer: AdamW ase_optimizer: FIRE cell_filter: ExpCellFilter max_steps: 500 max_force: 0.02 # eV/Å learning_rate: 0.0005 # initial value, linearly decayed to 0 seed: 42 batch_size: 128 units: 256 graph_construction_radius: 5.0 # Å max_neighbors: .inf notes: Description: This is an open source version of MatterSim V1 based on M3GNet architecture. Training: MatterSim was trained on a large, closed dataset covering diverse combinations of 89 elements across many temperatures and pressures. Tested Applications: | - Energy, force, stress prediction - Molecular dynamics simulations - Phonons - Mechanical properties - Free energy and phase diagrams - Materials discovery Training Data Sources: | - Materials Project - Alexandria dataset - newly generated structures and MD trajectories metrics: phonons: kappa_103: # https://github.com/MPA2suite/k_SRME/pull/13 κ_SRME: 0.5745 # dimensionless pred_file: models/mattersim/mattersim-v1-5M/2024-12-09-kappa-103-FIRE-dist=0.01-fmax=1e-4-symprec=1e-5.json.gz pred_file_url: https://figshare.com/files/52134884 κ_SRE: 0.4126 geo_opt: pred_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt.jsonl.gz pred_file_url: https://figshare.com/files/57753373 struct_col: mattersim_5M_structure symprec=1e-2: rmsd: 0.0733 # unitless n_sym_ops_mae: 1.7451 # unitless symmetry_decrease: 0.0524 # fraction symmetry_match: 0.8148 # fraction symmetry_increase: 0.1254 # fraction n_structures: 256963 # count analysis_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt-symprec=1e-2-moyo=0.4.2.csv.gz analysis_file_url: https://figshare.com/files/53504675 symprec=1e-5: rmsd: 0.0733 # unitless n_sym_ops_mae: 2.1408 # unitless symmetry_decrease: 0.0755 # fraction symmetry_match: 0.6874 # fraction symmetry_increase: 0.2305 # fraction n_structures: 256963 # count analysis_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt-symprec=1e-5-moyo=0.4.2.csv.gz analysis_file_url: https://figshare.com/files/53504678 discovery: pred_file: models/mattersim/mattersim-v1-5M/2024-12-16-wbm-IS2RE.csv.gz # the original Graphormer-based replaced the M3GNet-based MatterSim on the leaderboard # pred_file: models/mattersim/2024-06-16-mattersim-graphormer-wbm-IS2RE.csv.gz pred_file_url: https://figshare.com/files/52057559 pred_col: e_form_per_atom_mattersim full_test_set: F1: 0.838 # fraction DAF: 5.123 # dimensionless Precision: 0.879 # fraction Recall: 0.801 # fraction Accuracy: 0.947 # fraction TPR: 0.801 # fraction FPR: 0.023 # fraction TNR: 0.977 # fraction FNR: 0.199 # fraction TP: 35309.0 # count FP: 4858.0 # count TN: 208013.0 # count FN: 8783.0 # count MAE: 0.024 # eV/atom RMSE: 0.069 # eV/atom R2: 0.854 # dimensionless missing_preds: 2 # count most_stable_10k: F1: 0.984 # fraction DAF: 6.339 # dimensionless Precision: 0.969 # fraction Recall: 1.0 # fraction Accuracy: 0.969 # fraction TPR: 1.0 # fraction FPR: 1.0 # fraction TNR: 0.0 # fraction FNR: 0.0 # fraction TP: 9691.0 # count FP: 309.0 # count TN: 0.0 # count FN: 0.0 # count MAE: 0.022 # eV/atom RMSE: 0.078 # eV/atom R2: 0.869 # dimensionless missing_preds: 0 # count unique_prototypes: F1: 0.862 # fraction DAF: 5.852 # dimensionless Precision: 0.895 # fraction Recall: 0.831 # fraction Accuracy: 0.959 # fraction TPR: 0.831 # fraction FPR: 0.018 # fraction TNR: 0.982 # fraction FNR: 0.169 # fraction TP: 27750.0 # count FP: 3268.0 # count TN: 178846.0 # count FN: 5624.0 # count MAE: 0.024 # eV/atom RMSE: 0.068 # eV/atom R2: 0.863 # dimensionless missing_preds: 0 # count diatomics: pred_file: models/mattersim/mattersim-v1-5M/2025-02-19-diatomics.json.gz pred_file_url: https://figshare.com/files/52468178