Merge pull request #39 from ml-evs/modnet_benchmark

Add MODNet (v0.1.10) benchmarks
materialsproject · Oct 18, 2021 · a6b3fcc · a6b3fcc
2 parents 830464d + cc36d63
commit a6b3fcc
Show file tree

Hide file tree

Showing 4 changed files with 456 additions and 0 deletions.
diff --git a/benchmarks/matbench_v0.1_modnet_v0.1.10/benchmarks.ipynb b/benchmarks/matbench_v0.1_modnet_v0.1.10/benchmarks.ipynb
@@ -0,0 +1,305 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# MODNet (v0.1.10)\n",
+    "\n",
+    "For now, this benchmark file simply loads our existing full benchmark results (~100 MB) from [ml-evs/modnet-matbench](https://github.com/ml-evs/modnet-matbench) and exports them in the matbench format. Code for featurisation, hyperparameter optimisation and the final predictions themselves can be found in the aforementioned repository or in the illustrative run.py file."
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "source": [
+    "def download_and_extract(url, fname):\n",
+    "    import urllib\n",
+    "    from zipfile import ZipFile\n",
+    "    import os\n",
+    "\n",
+    "    if os.path.exists(fname):\n",
+    "        print(f\"File {fname} already found, will not redownload.\")\n",
+    "        return\n",
+    "\n",
+    "    response = urllib.request.urlretrieve(url, fname)\n",
+    "    with ZipFile(fname, \"r\") as _zip:\n",
+    "        _zip.extractall(\".\")\n",
+    "\n",
+    "\n",
+    "repo = \"ml-evs/modnet-matbench\"\n",
+    "version = \"main\"\n",
+    "fname = f'modnet-matbench-{version.replace(\"#\", \"-\").replace(\"/\", \"-\")}'\n",
+    "\n",
+    "if version.startswith(\"v\"):\n",
+    "    url = f\"https://github.com/{repo}/archive/refs/tags/{version}.zip\"\n",
+    "else:\n",
+    "    url = f'https://github.com/{repo}/archive/refs/heads/{version.replace(\"#\", \"%23\")}.zip'\n",
+    "\n",
+    "\n",
+    "download_and_extract(url, fname + \".zip\")"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "source": [
+    "import pickle\n",
+    "from matbench.bench import MatbenchBenchmark\n",
+    "from matbench.constants import CLF_KEY\n",
+    "\n",
+    "mb = MatbenchBenchmark(\n",
+    "    autoload=False,\n",
+    "    subset=[\n",
+    "        \"matbench_dielectric\",\n",
+    "        \"matbench_jdft2d\",\n",
+    "        \"matbench_steels\",\n",
+    "        \"matbench_expt_gap\",\n",
+    "        \"matbench_phonons\",\n",
+    "        \"matbench_log_gvrh\",\n",
+    "        \"matbench_log_kvrh\",\n",
+    "        \"matbench_glass\",\n",
+    "        \"matbench_expt_is_metal\",\n",
+    "        \"matbench_perovskites\",\n",
+    "        \"matbench_mp_gap\",\n",
+    "        \"matbench_mp_is_metal\",\n",
+    "        \"matbench_mp_e_form\",\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "results_locs = {\n",
+    "    task.dataset_name: f\"{fname}/{task.dataset_name}/results/{task.dataset_name}_results.pkl\"\n",
+    "    for task in mb.tasks\n",
+    "}\n",
+    "# Remap filename for elastic tasks as they were joint-learned\n",
+    "results_locs[\"matbench_log_gvrh\"] = results_locs[\"matbench_log_kvrh\"] = f\"{fname}/matbench_elastic/results/matbench_elastic_results.pkl\"\n",
+    "target_key_map = {\n",
+    "    \"matbench_log_gvrh\": \"log10G_VRH\",\n",
+    "    \"matbench_log_kvrh\": \"log10K_VRH\",\n",
+    "}\n"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "2021-10-11 16:16:24 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: \n",
+      "['matbench_dielectric',\n",
+      " 'matbench_jdft2d',\n",
+      " 'matbench_steels',\n",
+      " 'matbench_expt_gap',\n",
+      " 'matbench_phonons',\n",
+      " 'matbench_log_gvrh',\n",
+      " 'matbench_log_kvrh',\n",
+      " 'matbench_glass',\n",
+      " 'matbench_expt_is_metal',\n",
+      " 'matbench_perovskites',\n",
+      " 'matbench_mp_gap',\n",
+      " 'matbench_mp_is_metal',\n",
+      " 'matbench_mp_e_form']\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "source": [
+    "for task in mb.tasks:\n",
+    "    task.load()\n",
+    "    with open(results_locs[task.dataset_name], \"rb\") as f:\n",
+    "        results = pickle.load(f)\n",
+    "        \n",
+    "    for fold_ind, fold in enumerate(task.folds):\n",
+    "\n",
+    "        # Handle predictions that were made with joint/multitarget learning\n",
+    "        if task.dataset_name in target_key_map:\n",
+    "            predictions = results[\"predictions\"][fold_ind][target_key_map[task.dataset_name]].values\n",
+    "            stds = results[\"stds\"][fold_ind][target_key_map[task.dataset_name]].values\n",
+    "        else:\n",
+    "            predictions = results[\"predictions\"][fold_ind].values\n",
+    "            stds = results[\"stds\"][fold_ind].values\n",
+    "        \n",
+    "        # Classification tasks must be recorded with labels and not group probabilities\n",
+    "        if task.metadata.task_type == CLF_KEY:\n",
+    "            predictions = predictions[:, 1]\n",
+    "\n",
+    "        predictions = predictions.flatten()\n",
+    "\n",
+    "        task.record(\n",
+    "            fold, \n",
+    "            predictions,\n",
+    "            params={\"std\": stds},\n",
+    "        )\n",
+    "            \n",
+    "        \n",
+    "    if task.metadata.task_type == CLF_KEY:\n",
+    "        print(f\"{task.dataset_name}: Accuracy score {task.scores['accuracy']['mean']}\")\n",
+    "        print(f\"{task.dataset_name}: ROC score {task.scores['rocauc']['mean']}\")\n",
+    "    else:\n",
+    "        print(f\"{task.dataset_name}: MAE {task.scores['mae']['mean']}\")\n",
+    "\n",
+    "    task.df = None"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "2021-10-11 16:16:24 INFO     Loading dataset 'matbench_dielectric'...\n",
+      "2021-10-11 16:16:29 INFO     Dataset 'matbench_dielectric loaded.\n",
+      "2021-10-11 16:16:29 INFO     Recorded fold matbench_dielectric-0 successfully.\n",
+      "2021-10-11 16:16:29 INFO     Recorded fold matbench_dielectric-1 successfully.\n",
+      "2021-10-11 16:16:29 INFO     Recorded fold matbench_dielectric-2 successfully.\n",
+      "2021-10-11 16:16:29 INFO     Recorded fold matbench_dielectric-3 successfully.\n",
+      "2021-10-11 16:16:29 INFO     Recorded fold matbench_dielectric-4 successfully.\n",
+      "matbench_dielectric: MAE 0.2969698688737498\n",
+      "2021-10-11 16:16:29 INFO     Loading dataset 'matbench_jdft2d'...\n",
+      "2021-10-11 16:16:30 INFO     Dataset 'matbench_jdft2d loaded.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_jdft2d-0 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_jdft2d-1 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_jdft2d-2 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_jdft2d-3 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_jdft2d-4 successfully.\n",
+      "matbench_jdft2d: MAE 34.53678641963336\n",
+      "2021-10-11 16:16:30 INFO     Loading dataset 'matbench_steels'...\n",
+      "2021-10-11 16:16:30 INFO     Dataset 'matbench_steels loaded.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_steels-0 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_steels-1 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_steels-2 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_steels-3 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_steels-4 successfully.\n",
+      "matbench_steels: MAE 96.21387590993324\n",
+      "2021-10-11 16:16:30 INFO     Loading dataset 'matbench_expt_gap'...\n",
+      "2021-10-11 16:16:30 INFO     Dataset 'matbench_expt_gap loaded.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_expt_gap-0 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_expt_gap-1 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_expt_gap-2 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_expt_gap-3 successfully.\n",
+      "2021-10-11 16:16:30 INFO     Recorded fold matbench_expt_gap-4 successfully.\n",
+      "matbench_expt_gap: MAE 0.3470153653294551\n",
+      "2021-10-11 16:16:30 INFO     Loading dataset 'matbench_phonons'...\n",
+      "2021-10-11 16:16:31 INFO     Dataset 'matbench_phonons loaded.\n",
+      "2021-10-11 16:16:31 INFO     Recorded fold matbench_phonons-0 successfully.\n",
+      "2021-10-11 16:16:31 INFO     Recorded fold matbench_phonons-1 successfully.\n",
+      "2021-10-11 16:16:31 INFO     Recorded fold matbench_phonons-2 successfully.\n",
+      "2021-10-11 16:16:31 INFO     Recorded fold matbench_phonons-3 successfully.\n",
+      "2021-10-11 16:16:31 INFO     Recorded fold matbench_phonons-4 successfully.\n",
+      "matbench_phonons: MAE 38.7524344203875\n",
+      "2021-10-11 16:16:31 INFO     Loading dataset 'matbench_log_gvrh'...\n",
+      "2021-10-11 16:16:37 INFO     Dataset 'matbench_log_gvrh loaded.\n",
+      "2021-10-11 16:16:37 INFO     Recorded fold matbench_log_gvrh-0 successfully.\n",
+      "2021-10-11 16:16:37 INFO     Recorded fold matbench_log_gvrh-1 successfully.\n",
+      "2021-10-11 16:16:37 INFO     Recorded fold matbench_log_gvrh-2 successfully.\n",
+      "2021-10-11 16:16:37 INFO     Recorded fold matbench_log_gvrh-3 successfully.\n",
+      "2021-10-11 16:16:37 INFO     Recorded fold matbench_log_gvrh-4 successfully.\n",
+      "matbench_log_gvrh: MAE 0.07311620406947483\n",
+      "2021-10-11 16:16:37 INFO     Loading dataset 'matbench_log_kvrh'...\n",
+      "2021-10-11 16:16:45 INFO     Dataset 'matbench_log_kvrh loaded.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_log_kvrh-0 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_log_kvrh-1 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_log_kvrh-2 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_log_kvrh-3 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_log_kvrh-4 successfully.\n",
+      "matbench_log_kvrh: MAE 0.05477001646276852\n",
+      "2021-10-11 16:16:45 INFO     Loading dataset 'matbench_glass'...\n",
+      "2021-10-11 16:16:45 INFO     Dataset 'matbench_glass loaded.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_glass-0 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_glass-1 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_glass-2 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_glass-3 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_glass-4 successfully.\n",
+      "matbench_glass: Accuracy score 0.8676056338028169\n",
+      "matbench_glass: ROC score 0.8106763388737604\n",
+      "2021-10-11 16:16:45 INFO     Loading dataset 'matbench_expt_is_metal'...\n",
+      "2021-10-11 16:16:45 INFO     Dataset 'matbench_expt_is_metal loaded.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_expt_is_metal-0 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_expt_is_metal-1 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_expt_is_metal-2 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_expt_is_metal-3 successfully.\n",
+      "2021-10-11 16:16:45 INFO     Recorded fold matbench_expt_is_metal-4 successfully.\n",
+      "matbench_expt_is_metal: Accuracy score 0.9160717675704676\n",
+      "matbench_expt_is_metal: ROC score 0.9160515032798082\n",
+      "2021-10-11 16:16:45 INFO     Loading dataset 'matbench_perovskites'...\n",
+      "2021-10-11 16:16:51 INFO     Dataset 'matbench_perovskites loaded.\n",
+      "2021-10-11 16:16:51 INFO     Recorded fold matbench_perovskites-0 successfully.\n",
+      "2021-10-11 16:16:51 INFO     Recorded fold matbench_perovskites-1 successfully.\n",
+      "2021-10-11 16:16:51 INFO     Recorded fold matbench_perovskites-2 successfully.\n",
+      "2021-10-11 16:16:51 INFO     Recorded fold matbench_perovskites-3 successfully.\n",
+      "2021-10-11 16:16:51 INFO     Recorded fold matbench_perovskites-4 successfully.\n",
+      "matbench_perovskites: MAE 0.09075423473752561\n",
+      "2021-10-11 16:16:51 INFO     Loading dataset 'matbench_mp_gap'...\n",
+      "2021-10-11 16:19:44 INFO     Dataset 'matbench_mp_gap loaded.\n",
+      "2021-10-11 16:19:44 INFO     Recorded fold matbench_mp_gap-0 successfully.\n",
+      "2021-10-11 16:19:44 INFO     Recorded fold matbench_mp_gap-1 successfully.\n",
+      "2021-10-11 16:19:44 INFO     Recorded fold matbench_mp_gap-2 successfully.\n",
+      "2021-10-11 16:19:44 INFO     Recorded fold matbench_mp_gap-3 successfully.\n",
+      "2021-10-11 16:19:44 INFO     Recorded fold matbench_mp_gap-4 successfully.\n",
+      "matbench_mp_gap: MAE 0.21987236694632012\n",
+      "2021-10-11 16:19:46 INFO     Loading dataset 'matbench_mp_is_metal'...\n",
+      "2021-10-11 16:21:47 INFO     Dataset 'matbench_mp_is_metal loaded.\n",
+      "2021-10-11 16:21:47 INFO     Recorded fold matbench_mp_is_metal-0 successfully.\n",
+      "2021-10-11 16:21:47 INFO     Recorded fold matbench_mp_is_metal-1 successfully.\n",
+      "2021-10-11 16:21:47 INFO     Recorded fold matbench_mp_is_metal-2 successfully.\n",
+      "2021-10-11 16:21:47 INFO     Recorded fold matbench_mp_is_metal-3 successfully.\n",
+      "2021-10-11 16:21:47 INFO     Recorded fold matbench_mp_is_metal-4 successfully.\n",
+      "matbench_mp_is_metal: Accuracy score 0.8030506180286311\n",
+      "matbench_mp_is_metal: ROC score 0.7804643191398983\n",
+      "2021-10-11 16:21:49 INFO     Loading dataset 'matbench_mp_e_form'...\n",
+      "2021-10-11 16:24:21 INFO     Dataset 'matbench_mp_e_form loaded.\n",
+      "2021-10-11 16:24:21 INFO     Recorded fold matbench_mp_e_form-0 successfully.\n",
+      "2021-10-11 16:24:21 INFO     Recorded fold matbench_mp_e_form-1 successfully.\n",
+      "2021-10-11 16:24:21 INFO     Recorded fold matbench_mp_e_form-2 successfully.\n",
+      "2021-10-11 16:24:21 INFO     Recorded fold matbench_mp_e_form-3 successfully.\n",
+      "2021-10-11 16:24:21 INFO     Recorded fold matbench_mp_e_form-4 successfully.\n",
+      "matbench_mp_e_form: MAE 0.044769163811452004\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "source": [
+    "mb.to_file(\"results.json.gz\")"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "2021-10-11 16:24:27 INFO     Successfully wrote MatbenchBenchmark to file 'results.json.gz'.\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.8.11 64-bit ('modnet_matbench': conda)"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.11"
+  },
+  "interpreter": {
+   "hash": "ab74d777aa78e6796984bc572ffbc8ac7917152da5feb624ad45fa04e0a32b7a"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/benchmarks/matbench_v0.1_modnet_v0.1.10/info.json b/benchmarks/matbench_v0.1_modnet_v0.1.10/info.json
@@ -0,0 +1,8 @@
+{
+  "authors": "Pierre-Paul DeBreuck (@ppdebreuck), Matthew Evans (@ml-evs)",
+  "algorithm": "MODNet (v0.1.10)",
+  "algorithm_long": "MODNet, the Materials Optimal Descriptor Network (v0.1.10). A feed-forward neural network, using all compatible matminer features and a relevance-redundancy based feature selection algorithm. Hyperparameter optimisation is performed with a nested grid search. Benchmark results were loaded from https://github.com/ml-evs/modnet-matbench.",
+  "bibtex_refs": "@article{De_Breuck_2021, doi = {10.1088/1361-648x/ac1280}, url = {https://doi.org/10.1088/1361-648x/ac1280}, year = 2021, month = {jul}, publisher = {{IOP} Publishing}, volume = {33}, number = {40}, pages = {404002}, author = {Pierre-Paul De Breuck and Matthew L Evans and Gian-Marco Rignanese}, title = {Robust model benchmarking and bias-imbalance in data-driven materials science: a case study on {MODNet}}, journal = {Journal of Physics: Condensed Matter}, abstract = {As the number of novel data-driven approaches to material science continues to grow, it is crucial to perform consistent quality, reliability and applicability assessments of model performance. In this paper, we benchmark the Materials Optimal Descriptor Network (MODNet) method and architecture against the recently released MatBench v0.1, a curated test suite of materials datasets. MODNet is shown to outperform current leaders on 6 of the 13 tasks, while closely matching the current leaders on a further 2 tasks; MODNet performs particularly well when the number of samples is below 10 000. Attention is paid to two topics of concern when benchmarking models. First, we encourage the reporting of a more diverse set of metrics as it leads to a more comprehensive and holistic comparison of model performance. Second, an equally important task is the uncertainty assessment of a model towards a target domain. Significant variations in validation errors can be observed, depending on the imbalance and bias in the training set (i.e., similarity between training and application space). By using an ensemble MODNet model, confidence intervals can be built and the uncertainty on individual predictions can be quantified. Imbalance and bias issues are often overlooked, and yet are important for successful real-world applications of machine learning in materials science and condensed matter.}}, @article{DeBreuck2021, doi = {10.1038/s41524-021-00552-2}, url = {https://doi.org/10.1038/s41524-021-00552-2}, year = {2021}, month = jun, publisher = {Springer Science and Business Media {LLC}}, volume = {7}, number = {1}, author = {Pierre-Paul De Breuck and Geoffroy Hautier and Gian-Marco Rignanese}, title = {Materials property prediction for limited datasets enabled by feature selection and joint learning with {MODNet}}, journal = {npj Computational Materials}}",
+  "notes": null,
+  "requirements": {"python":  ["modnet==0.1.10", "matbench==0.2.0"]}
+}
diff --git a/benchmarks/matbench_v0.1_modnet_v0.1.10/results.json.gz b/benchmarks/matbench_v0.1_modnet_v0.1.10/results.json.gz