{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import packages and functions" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import sys\n", "# force the notebook to look for files in the upper level directory\n", "sys.path.insert(1, '../')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from glob import glob\n", "import pymatgen as mg\n", "from data.compound_featurizer import read_new_struct, \\\n", " get_struct, get_elem_info, get_elem_distances, \\\n", " calc_mm_dists, calc_mx_dists, calc_xx_dists, calc_elem_max_potential" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Read in the initial dataframe" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# initialize an empty list of dataframes\n", "df_lst = []\n", "# iterate over all the cif files\n", "for struct_file_path in glob(\"./user_defined_structures/featurizer_sub_function_demo/*.cif\"):\n", " # add the newly read in dataframe to the list\n", " df_lst.append(read_new_struct(struct_file_path))\n", "# concatenate all the dataframes in the list\n", "df = pd.concat(df_lst, ignore_index=True)\n", "# assign oxidation states to BaTiO3 and Mg2AlFeO5\n", "df.at[df[df.Compound == \"BaTiO3\"].index[0], \"structure\"].add_oxidation_state_by_element({\"Ba\": 2, \"Ti\": 4, \"O\": -2})\n", "df.at[df[df.Compound == \"Mg2AlFeO5\"].index[0], \"structure\"].add_oxidation_state_by_element({\"Mg\": 2, \"Al\": 3, \"Fe\": 3, \"O\": -2})" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Compoundstructure
0Mg2AlFeO5[[0.1798251 1.58702 2.644008 ] Mg2+, [ 6.407...
1La2.8Mg1.2Mn4O12[[0.12084071 1.929845 5.45406422] La:0.700, ...
2BaTiO3[[0.00849286 0.00844357 0.00854272] Ba2+, [2.1...
\n", "
" ], "text/plain": [ " Compound structure\n", "0 Mg2AlFeO5 [[0.1798251 1.58702 2.644008 ] Mg2+, [ 6.407...\n", "1 La2.8Mg1.2Mn4O12 [[0.12084071 1.929845 5.45406422] La:0.700, ...\n", "2 BaTiO3 [[0.00849286 0.00844357 0.00854272] Ba2+, [2.1..." ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# here is a print out of the dataframe\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Demo usage of relevant sub-functions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. get_struct(\"compound_formula\", input_df) -> Pymatgen Structure\n", "Since we've already read in all the structures in dataframe, we can access the individual Pymatgen structure using the compound formula. \n", "\n", "_Tip_: when you have questions about a specific function, you can always go to the original .py file or you can press ⇧ Shift + ⇥ Tab for its docstring" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "Structure Summary\n", "Lattice\n", " abc : 4.081131019999999 4.08113102 4.08113102\n", " angles : 89.66458222000001 89.66458222000001 89.66458222000001\n", " volume : 67.97032918428083\n", " A : 4.081061087960032 0.0 0.02389139478379035\n", " B : 0.023751938902471077 4.080991968757093 0.02389139478379035\n", " C : 0.0 0.0 4.08113102\n", "PeriodicSite: Ba2+ (0.0085, 0.0084, 0.0085) [0.0021, 0.0021, 0.0021]\n", "PeriodicSite: Ti4+ (2.1199, 2.1076, 2.1323) [0.5164, 0.5164, 0.5164]\n", "PeriodicSite: O2- (1.9893, 1.9777, 3.9972) [0.4846, 0.4846, 0.9738]\n", "PeriodicSite: O2- (2.0009, 3.9739, 2.0126) [0.4846, 0.9738, 0.4846]\n", "PeriodicSite: O2- (3.9855, 1.9777, 2.0126) [0.9738, 0.4846, 0.4846]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_struct = get_struct(\"BaTiO3\", df)\n", "test_struct" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you happen to type in a formula that doesn't have an exact match, the function will return an error message along with several possible suggestions" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "ename": "Exception", "evalue": "The structure does not exist in this dataframe. The closest matches are ['BaTiO3'].", "output_type": "error", "traceback": [ "\u001b[0;31m--------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0mTraceback (most recent call last)", "\u001b[0;32m~/PycharmProjects/mit_model_code/data/compound_featurizer.py\u001b[0m in \u001b[0;36mget_struct\u001b[0;34m(compound_formula, df_input, struct_type)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 62\u001b[0;31m \u001b[0mstruct_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf_input\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf_input\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mCompound\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mcompound_formula\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstruct_type\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 63\u001b[0m \u001b[0;31m# if the formula has no exact in the input dataframe,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mIndexError\u001b[0m: index 0 is out of bounds for axis 0 with size 0", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mException\u001b[0mTraceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mget_struct\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"BaTiO\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/PycharmProjects/mit_model_code/data/compound_featurizer.py\u001b[0m in \u001b[0;36mget_struct\u001b[0;34m(compound_formula, df_input, struct_type)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mIndexError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 66\u001b[0m raise Exception(\"The structure does not exist in this dataframe. The closest matches are {}.\".\n\u001b[0;32m---> 67\u001b[0;31m format(difflib.get_close_matches(compound_formula, df_input.Compound)))\n\u001b[0m\u001b[1;32m 68\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mstruct_output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mException\u001b[0m: The structure does not exist in this dataframe. The closest matches are ['BaTiO3']." ] } ], "source": [ "get_struct(\"BaTiO\", df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "_BaTiO3_ will be used consistently as the demo test structure from now on." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. get_elem_distances(Pymatgen_Structure, Pymatgen_Element_1, Pymatgen_Element_2) -> Array of distances (Å)\n", "\n", "Now that we have the structure, we can use **get_elem_distances()** to calculate the distance between any two elements in the structure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "But before doing that, we first need to know which site(s) each element occupies through the **get_elem_info()** function" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{Element Ba: [0, 1], Element Ti: [2, 3], Element O: [4, 5, 6, 7, 8, 9]} \n", "\n", "Full Formula (Ba2 Ti2 O6)\n", "Reduced Formula: BaTiO3\n", "abc : 8.162262 4.081131 4.081131\n", "angles: 89.664582 89.664582 89.664582\n", "Sites (10)\n", " # SP a b c\n", "--- ---- -------- -------- --------\n", " 0 Ba2+ 0.001035 0.002069 0.002069\n", " 1 Ba2+ 0.501035 0.002069 0.002069\n", " 2 Ti4+ 0.25822 0.51644 0.51644\n", " 3 Ti4+ 0.75822 0.51644 0.51644\n", " 4 O2- 0.24231 0.484619 0.973753\n", " 5 O2- 0.742309 0.484619 0.973753\n", " 6 O2- 0.24231 0.973753 0.484619\n", " 7 O2- 0.742309 0.973753 0.484619\n", " 8 O2- 0.486876 0.484619 0.484619\n", " 9 O2- 0.986876 0.484619 0.484619\n" ] } ], "source": [ "elem_indices, _, modified_struct = get_elem_info(test_struct)\n", "print(elem_indices, \"\\n\")\n", "print(modified_struct)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you compare this to the printout from the original, you will find that the modified structure have double the amount of sites" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Full Formula (Ba1 Ti1 O3)\n", "Reduced Formula: BaTiO3\n", "abc : 4.081131 4.081131 4.081131\n", "angles: 89.664582 89.664582 89.664582\n", "Sites (5)\n", " # SP a b c\n", "--- ---- -------- -------- --------\n", " 0 Ba2+ 0.002069 0.002069 0.002069\n", " 1 Ti4+ 0.51644 0.51644 0.51644\n", " 2 O2- 0.484619 0.484619 0.973753\n", " 3 O2- 0.484619 0.973753 0.484619\n", " 4 O2- 0.973753 0.484619 0.484619\n" ] } ], "source": [ "print(test_struct)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This is because if we keep the original function, _Ba_ and _Ti_ will only occupy one site" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{Element Ba: [0], Element Ti: [1], Element O: [2, 3, 4]}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "elem_indices_orig, *_ = get_elem_info(test_struct, makesupercell=False)\n", "elem_indices_orig" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The reason for returning a supercell of the original structure is related to the inner workings of **get_elem_distances()** function. It basically works by getting the site indices of the two elements (they can be the same) and using the built-in method of **pymatgen.Structure.get_distance(i, j)** to calculate the distance between site i and site j. There is one scenario where only using the original structure can cause a problem:\n", "\n", "1. If we have a structure where an element only occupies one site and we want to know the distance between the same elements, e.g. _Ba_-_Ba_ or _Ti_-_Ti_ in _BaTiO3_, we would have **pymatgen.Structure.get_distance(i, j)** where i = j and we would only get 0 for that distance.\n", "\n", "By making a supercell (in this case a'=2a, b'=b, c'=c), we would be able to get a non-zero distance betweem the original site and the newly translated site along the a-axis. That being said, if all elements in the original structure all occupy more than one site, the structure will not be modified.\n", "\n", "Let's first try to calculate the _Ba_-_Ba_ distance using the supercell structure" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "array([3.45281586])" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_elem_distances(test_struct,\n", " elem_1=mg.Element(\"Ba\"),\n", " elem_indices=elem_indices, only_unique=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Note**: when the `only_unique` parameter is set to be `True`, the function will only return the unique values of distance since in a structure the same distance can occur multiple times due to symmetry.\n", "\n", "Let's see what happens when we use the original reduced structure" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_elem_distances(test_struct,\n", " elem_1=mg.Element(\"Ba\"),\n", " elem_indices=elem_indices_orig, only_unique=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As expected, we get 0 Å. We can also calculate the distance between different elements. Let's see the distance between _Ti_ and _O_" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1.87390777, 1.87390777, 1.87390777])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_elem_distances(test_struct,\n", " elem_1=mg.Element(\"O\"), elem_2=mg.Element(\"Ti\"),\n", " elem_indices=elem_indices_orig, only_unique=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This function can also handle structures where multiple elements can occupy the same site (La$_{2.8}$Mg$_{1.2}$Mn$_4$O$_{12}$ is a made-up structure generated for the purpose of this demo)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Full Formula (La2.8 Mg1.2 Mn4 O12)\n", "Reduced Formula: La2.8Mg1.2Mn4O12\n", "abc : 6.462070 7.719380 5.480370\n", "angles: 90.000000 90.000000 90.000000\n", "Sites (20)\n", " # SP a b c\n", "--- ------------------ ------ ------ ------\n", " 0 La:0.700, Mg:0.300 0.0187 0.25 0.9952\n", " 1 La:0.700, Mg:0.300 0.9813 0.75 0.0048\n", " 2 La:0.700, Mg:0.300 0.4813 0.75 0.4952\n", " 3 La:0.700, Mg:0.300 0.5187 0.25 0.5048\n", " 4 Mn 0 0 0.5\n", " 5 Mn 0.5 0 0\n", " 6 Mn 0 0.5 0.5\n", " 7 Mn 0.5 0.5 0\n", " 8 O 0.491 0.25 0.0629\n", " 9 O 0.509 0.75 0.9371\n", " 10 O 0.009 0.75 0.5629\n", " 11 O 0.991 0.25 0.4371\n", " 12 O 0.2742 0.0334 0.7249\n", " 13 O 0.7258 0.9666 0.2751\n", " 14 O 0.2258 0.9666 0.2249\n", " 15 O 0.7742 0.0334 0.7751\n", " 16 O 0.7258 0.5334 0.2751\n", " 17 O 0.2742 0.4666 0.7249\n", " 18 O 0.7742 0.4666 0.7751\n", " 19 O 0.2258 0.5334 0.2249\n" ] } ], "source": [ "special_struct = get_struct(\"La2.8Mg1.2Mn4O12\", df)\n", "print(special_struct)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "elem_indices, *_ = get_elem_info(special_struct)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([3.33227319, 3.33227319, 3.33227319, 3.66036914, 3.66036914,\n", " 3.66036914])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distances = get_elem_distances(special_struct,\n", " elem_1=mg.Element(\"La\"), elem_2=mg.Element(\"Mn\"),\n", " elem_indices=elem_indices, only_unique=True)\n", "distances" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It may seem that there are some distances that are equal to each other, but since the values displayed do not have all the decimal places shown, there are still slight differences among them." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-4.440892098500626e-16" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distances[0] - distances[1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Wrapper functions around get_elem_distances() to calculate distances between different types of elements" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.1 calc_mm_dists() to calculate distances between metal-metal elements" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "{'Ti-Ti': array([4.08113102]),\n", " 'Ti-Ba': array([3.45281586, 3.45281586, 3.49445999]),\n", " 'Ba-Ba': array([4.08113102])}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_mm_dists(test_struct, return_unique=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.2 calc_mx_dists() to calculate distances between metal-non_metal elements" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "{'Ti-O': array([1.87390777, 1.87390777, 1.87390777, 1.87390777, 1.87390777,\n", " 2.22393768, 2.22393768]),\n", " 'Ba-O': array([2.79465755, 2.79465755, 2.88146024, 2.88146024, 2.88146024])}" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_mx_dists(test_struct, return_unique=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.3 calc_xx_dists() to calculate distances between non_metal-non_metal elements" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "{'O-O': array([2.81480587, 2.81480587, 2.81480587, 2.81480587, 2.89490539,\n", " 2.89490539, 2.89490539, 2.89490539])}" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_xx_dists(test_struct, return_unique=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This functionality is realized again through the **get_elem_info()** function where all the elements in the structure is classified as either a metal or a non_metal." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'non_metals': [Element O],\n", " 'all_metals': [Element Ti, Element Ba],\n", " 'most_electro_neg_metal': Element Ti,\n", " 'other_metals': [Element Ba]}" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "_, elem_groups, _ = get_elem_info(test_struct)\n", "elem_groups" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Once we know which elements are metal and which ones are non_metal, we can then use the elem_indices to find where they are (i.e. the site indices) and compute the distances using the generic element distance finder **get_elem_distances()**." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. calc_elem_max_potential() to calculate Madelung Site Potentials" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The **calc_elem_max_potential()** utilizes the EwaldSummation() module from Pymatgen to calculate site energy for all the sites in a structure and convert the site energy to site potential using the relation as follows. ($U_{E_\\text{tot}}$: the total potential energy of the structure, $U_{E_i}$: the site energy at site i, $N$: the total number of sites, $q_i$: the charge at site i, $\\Phi(r_i)$: the site potential at site i)\n", "\n", "$$\n", "\\begin{align*}\n", " U_{E_\\text{tot}}&=\\sum_{i=1}^{N}U_{E_i}=\\frac{1}{2}\\sum_{i=1}^{N}q_i\\Phi(r_i)\\\\\n", " U_{E_i}&=\\frac{1}{2}q_i\\Phi(r_i)\\\\\n", " \\Phi(r_i)&=\\frac{2U_{E_i}}{q_i}\n", "\\end{align*}\n", "$$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The default output unit for the Madelung site potential is in $V$" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{Element Ba: [-19.007087770378423],\n", " Element Ti: [-44.09209640742854],\n", " Element O: [23.05600190304703, 23.056001903047033, 23.05600190304704]}" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_elem_max_potential(test_struct, full_list=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "But the unit can be converted from $V$ to $e/Å$ for easier comparison with the results from VESTA" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{Element Ba: [-1.3199687332941026],\n", " Element Ti: [-3.0620255636372096],\n", " Element O: [1.6011501601113243, 1.6011501601113245, 1.601150160111325]}" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_elem_max_potential(test_struct, full_list=True, check_vesta=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If we don't specify the `full_list` parameter, it will be set to `False` and the function only return the maximum site potential for each element. " ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{Element Ba: -19.007087770378423,\n", " Element Ti: -44.09209640742854,\n", " Element O: 23.05600190304704}" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_elem_max_potential(test_struct)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Just like before, this function can also work with structures where multiple elements occupy the same site. We can try a compound with non-integer stoichiometry this time. (again, Mg$_2$AlFeO$_5$ is a made-up structure)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Full Formula (Mg8 Al4 Fe4 O20)\n", "Reduced Formula: Mg2AlFeO5\n", "abc : 6.587000 14.600000 5.374000\n", "angles: 90.000000 90.000000 90.000000\n", "Sites (36)\n", " # SP a b c\n", "--- ---------------------- ------ ------ ------\n", " 0 Mg2+ 0.0273 0.1087 0.492\n", " 1 Mg2+ 0.9727 0.8913 0.492\n", " 2 Mg2+ 0.9727 0.6087 0.492\n", " 3 Mg2+ 0.0273 0.3913 0.492\n", " 4 Mg2+ 0.5273 0.6087 0.992\n", " 5 Mg2+ 0.4727 0.3913 0.992\n", " 6 Mg2+ 0.4727 0.1087 0.992\n", " 7 Mg2+ 0.5273 0.8913 0.992\n", " 8 Al3+:0.760, Fe3+:0.240 0.9283 0.25 0.9533\n", " 9 Al3+:0.760, Fe3+:0.240 0.0717 0.75 0.9533\n", " 10 Al3+:0.760, Fe3+:0.240 0.4283 0.75 0.4533\n", " 11 Al3+:0.760, Fe3+:0.240 0.5717 0.25 0.4533\n", " 12 Al3+:0.240, Fe3+:0.760 0 0 0\n", " 13 Al3+:0.240, Fe3+:0.760 0 0.5 0\n", " 14 Al3+:0.240, Fe3+:0.760 0.5 0.5 0.5\n", " 15 Al3+:0.240, Fe3+:0.760 0.5 0 0.5\n", " 16 O2- 0.2523 0.9861 0.2491\n", " 17 O2- 0.7477 0.0139 0.2491\n", " 18 O2- 0.7477 0.4861 0.2491\n", " 19 O2- 0.2523 0.5139 0.2491\n", " 20 O2- 0.7523 0.4861 0.7491\n", " 21 O2- 0.2477 0.5139 0.7491\n", " 22 O2- 0.2477 0.9861 0.7491\n", " 23 O2- 0.7523 0.0139 0.7491\n", " 24 O2- 0.068 0.1493 0.0246\n", " 25 O2- 0.932 0.8507 0.0246\n", " 26 O2- 0.932 0.6493 0.0246\n", " 27 O2- 0.068 0.3507 0.0246\n", " 28 O2- 0.568 0.6493 0.5246\n", " 29 O2- 0.432 0.3507 0.5246\n", " 30 O2- 0.432 0.1493 0.5246\n", " 31 O2- 0.568 0.8507 0.5246\n", " 32 O2- 0.8607 0.25 0.6193\n", " 33 O2- 0.1393 0.75 0.6193\n", " 34 O2- 0.3607 0.75 0.1193\n", " 35 O2- 0.6393 0.25 0.1193\n" ] } ], "source": [ "non_stoich_struct = get_struct(\"Mg2AlFeO5\", df)\n", "print(non_stoich_struct)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{Element Mg: -1.3304538713264666,\n", " Element Fe: -2.264442240109135,\n", " Element Al: -2.264442240109135,\n", " Element O: 1.6742872520162175}" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_elem_max_potential(non_stoich_struct, check_vesta=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now it's your turn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you want to test the functions with structures that are not in the loaded dataframe, you can also upload your own .cif file to the `user_defined` folder located at this path \n", "\n", "_./user_defined_structures/_" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "USER_DEFINED_FOLDER_PATH = \"./user_defined_structures/\"" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "example_new_struct = mg.Structure.from_file(USER_DEFINED_FOLDER_PATH + \"CuNiO2_mp-1178372_primitive.cif\")" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure Summary\n", "Lattice\n", " abc : 3.09746735 3.09746735 5.69675328\n", " angles : 64.90585729 115.09414271000001 124.91178089\n", " volume : 39.36031605994916\n", " A : 2.8051040966198997 0.0 -1.3136571057328008\n", " B : -1.3422903888963815 2.463100790619447 1.3136571057328006\n", " C : 0.0 0.0 5.69675328\n", "PeriodicSite: Cu (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]\n", "PeriodicSite: Ni (0.0000, 0.0000, 2.8484) [0.0000, 0.0000, 0.5000]\n", "PeriodicSite: O (1.0256, 1.0569, 1.3444) [0.5709, 0.4291, 0.2687]\n", "PeriodicSite: O (0.4373, 1.4062, 4.3524) [0.4291, 0.5709, 0.7313]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "example_new_struct" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define a wrapper function around get_elem_distances()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "def get_elem_distances_wrapper(structure: mg.Structure, **kwargs):\n", " \"\"\"A wrapper function around get_elem_distances() such that there is no need to get elem_indices manually\"\"\"\n", " elem_indices, _, structure = get_elem_info(structure)\n", " \n", " return get_elem_distances(structure, elem_indices=elem_indices, only_unique=True, **kwargs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check the _Cu_-_Ni_ distance" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2.84837664, 3.19749481])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_elem_distances_wrapper(example_new_struct, elem_1=mg.Element(\"Cu\"), elem_2=mg.Element(\"Ni\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check the _Ni_-_O_ distance" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2.07845014, 2.07845014, 2.10492626, 2.10492626])" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_elem_distances_wrapper(example_new_struct, elem_1=mg.Element(\"O\"), elem_2=mg.Element(\"Ni\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Check the _Cu_-_Cu_ distance" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2.864732])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_elem_distances_wrapper(example_new_struct, elem_1=mg.Element(\"Cu\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get distances of all three types of element pairs" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Ni-Ni': array([2.864732]),\n", " 'Ni-Cu': array([2.84837664, 3.19749481, 3.19749481, 2.84837664]),\n", " 'Cu-Cu': array([2.864732])}" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_mm_dists(example_new_struct)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'Ni-O': array([2.10492626, 2.07845014, 2.07845014, 2.10492626, 2.07845014,\n", " 2.10492626, 2.10492626, 2.07845014]),\n", " 'Cu-O': array([1.99401095, 1.99401095, 1.99401095, 1.99401095, 1.99401095,\n", " 1.99401095, 1.99401095, 1.99401095])}" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_mx_dists(example_new_struct)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'O-O': array([2.864732 , 2.77446017, 2.81194507, 2.81194507, 2.77446017,\n", " 2.864732 ])}" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_xx_dists(example_new_struct)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## A note for site potential calculation\n", "To use the EwaldSummation technique, the input structure has to have oxidation states (that's where the charge value comes from) associated with all the sites. A structure without oxidation states will raise an error in the function." ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "Element has no attribute oxi_state!", "output_type": "error", "traceback": [ "\u001b[0;31m--------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0mTraceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mcalc_elem_max_potential\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexample_new_struct\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/PycharmProjects/mit_model_code/data/compound_featurizer.py\u001b[0m in \u001b[0;36mcalc_elem_max_potential\u001b[0;34m(structure_oxid, full_list, check_vesta)\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 590\u001b[0m \u001b[0;31m# define a dictionary that stores the oxidation states for all the element\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 591\u001b[0;31m \u001b[0melem_charge_lookup\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mspecie\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0melement\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mspecie\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moxi_state\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mspecie\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mstructure_oxid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcomposition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0melements\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 592\u001b[0m \u001b[0;31m# if there is only one element, then ewald summation will not work\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 593\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem_charge_lookup\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/PycharmProjects/mit_model_code/data/compound_featurizer.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 590\u001b[0m \u001b[0;31m# define a dictionary that stores the oxidation states for all the element\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 591\u001b[0;31m \u001b[0melem_charge_lookup\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mspecie\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0melement\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mspecie\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moxi_state\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mspecie\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mstructure_oxid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcomposition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0melements\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 592\u001b[0m \u001b[0;31m# if there is only one element, then ewald summation will not work\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 593\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0melem_charge_lookup\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/PycharmProjects/mit_model_code/venv/lib/python3.7/site-packages/pymatgen/core/periodic_table.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 508\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 509\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 510\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Element has no attribute %s!\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 511\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 512\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mAttributeError\u001b[0m: Element has no attribute oxi_state!" ] } ], "source": [ "calc_elem_max_potential(example_new_struct)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To overcome this problem, we can add oxidation states to the structure using the add_oxidation_state_by_guess() method from Pymatgen" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Structure Summary\n", "Lattice\n", " abc : 3.09746735 3.09746735 5.69675328\n", " angles : 64.90585729 115.09414271000001 124.91178089\n", " volume : 39.36031605994916\n", " A : 2.8051040966198997 0.0 -1.3136571057328008\n", " B : -1.3422903888963815 2.463100790619447 1.3136571057328006\n", " C : 0.0 0.0 5.69675328\n", "PeriodicSite: Cu2+ (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]\n", "PeriodicSite: Ni2+ (0.0000, 0.0000, 2.8484) [0.0000, 0.0000, 0.5000]\n", "PeriodicSite: O2- (1.0256, 1.0569, 1.3444) [0.5709, 0.4291, 0.2687]\n", "PeriodicSite: O2- (0.4373, 1.4062, 4.3524) [0.4291, 0.5709, 0.7313]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "example_new_struct.add_oxidation_state_by_guess()\n", "example_new_struct" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now that we should be able to obtain proper results from the function." ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{Element Cu: -1.5818928513834425,\n", " Element Ni: -1.7340300140072384,\n", " Element O: 1.6414884929438913}" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calc_elem_max_potential(example_new_struct, check_vesta=True)" ] } ], "metadata": { "kernelspec": { "display_name": "PyCharm (mit_model_code)", "language": "python", "name": "pycharm-43a0cb91" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }