{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "toc": true
   },
   "source": [
    "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
    "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Series\" data-toc-modified-id=\"Series-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Series</a></span></li><li><span><a href=\"#DataFrame\" data-toc-modified-id=\"DataFrame-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>DataFrame</a></span></li><li><span><a href=\"#Using-the-axis-parameter\" data-toc-modified-id=\"Using-the-axis-parameter-3\"><span class=\"toc-item-num\">3&nbsp;&nbsp;</span>Using the axis parameter</a></span></li><li><span><a href=\"#Essesntial-Functionality\" data-toc-modified-id=\"Essesntial-Functionality-4\"><span class=\"toc-item-num\">4&nbsp;&nbsp;</span>Essesntial Functionality</a></span><ul class=\"toc-item\"><li><span><a href=\"#Reindexing\" data-toc-modified-id=\"Reindexing-4.1\"><span class=\"toc-item-num\">4.1&nbsp;&nbsp;</span>Reindexing</a></span></li><li><span><a href=\"#Dropping-Entries-from-an-axis\" data-toc-modified-id=\"Dropping-Entries-from-an-axis-4.2\"><span class=\"toc-item-num\">4.2&nbsp;&nbsp;</span>Dropping Entries from an axis</a></span></li><li><span><a href=\"#Indexing,-Selection,-and-Filtering\" data-toc-modified-id=\"Indexing,-Selection,-and-Filtering-4.3\"><span class=\"toc-item-num\">4.3&nbsp;&nbsp;</span>Indexing, Selection, and Filtering</a></span><ul class=\"toc-item\"><li><span><a href=\"#indexing\" data-toc-modified-id=\"indexing-4.3.1\"><span class=\"toc-item-num\">4.3.1&nbsp;&nbsp;</span>indexing</a></span></li><li><span><a href=\"#setting\" data-toc-modified-id=\"setting-4.3.2\"><span class=\"toc-item-num\">4.3.2&nbsp;&nbsp;</span>setting</a></span></li><li><span><a href=\"#Selection-with-loc,-iloc\" data-toc-modified-id=\"Selection-with-loc,-iloc-4.3.3\"><span class=\"toc-item-num\">4.3.3&nbsp;&nbsp;</span>Selection with loc, iloc</a></span></li></ul></li><li><span><a href=\"#Arithmetic-and-Data-Alignment\" data-toc-modified-id=\"Arithmetic-and-Data-Alignment-4.4\"><span class=\"toc-item-num\">4.4&nbsp;&nbsp;</span>Arithmetic and Data Alignment</a></span></li><li><span><a href=\"#Function-Application-and-Mapping\" data-toc-modified-id=\"Function-Application-and-Mapping-4.5\"><span class=\"toc-item-num\">4.5&nbsp;&nbsp;</span>Function Application and Mapping</a></span></li><li><span><a href=\"#Sorting-and-Ranking\" data-toc-modified-id=\"Sorting-and-Ranking-4.6\"><span class=\"toc-item-num\">4.6&nbsp;&nbsp;</span>Sorting and Ranking</a></span><ul class=\"toc-item\"><li><span><a href=\"#Sorting\" data-toc-modified-id=\"Sorting-4.6.1\"><span class=\"toc-item-num\">4.6.1&nbsp;&nbsp;</span>Sorting</a></span></li><li><span><a href=\"#Ranking\" data-toc-modified-id=\"Ranking-4.6.2\"><span class=\"toc-item-num\">4.6.2&nbsp;&nbsp;</span>Ranking</a></span></li></ul></li><li><span><a href=\"#Axis-Indexes-with-Duplicate-Labels\" data-toc-modified-id=\"Axis-Indexes-with-Duplicate-Labels-4.7\"><span class=\"toc-item-num\">4.7&nbsp;&nbsp;</span>Axis Indexes with Duplicate Labels</a></span></li></ul></li><li><span><a href=\"#Summarizing-and-Computing-Descriptive-Statistics\" data-toc-modified-id=\"Summarizing-and-Computing-Descriptive-Statistics-5\"><span class=\"toc-item-num\">5&nbsp;&nbsp;</span>Summarizing and Computing Descriptive Statistics</a></span><ul class=\"toc-item\"><li><span><a href=\"#Correlation-and-Covariance\" data-toc-modified-id=\"Correlation-and-Covariance-5.1\"><span class=\"toc-item-num\">5.1&nbsp;&nbsp;</span>Correlation and Covariance</a></span></li><li><span><a href=\"#Unique-Values,-Value-Counts,-and-Membership\" data-toc-modified-id=\"Unique-Values,-Value-Counts,-and-Membership-5.2\"><span class=\"toc-item-num\">5.2&nbsp;&nbsp;</span>Unique Values, Value Counts, and Membership</a></span></li></ul></li></ul></div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:01.227037Z",
     "start_time": "2019-12-24T11:54:00.439377Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:01.232635Z",
     "start_time": "2019-12-24T11:54:01.227037Z"
    }
   },
   "outputs": [],
   "source": [
    "import builtins\n",
    "\n",
    "def print(*args, **kwargs):\n",
    "    builtins.print(*args, **kwargs, end='\\n\\n')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-22T12:01:19.037222Z",
     "start_time": "2019-12-22T12:01:19.029951Z"
    }
   },
   "source": [
    "## Series\n",
    "\n",
    "A Series is a one-dimensional array-like object containing a sequence of values (of similar types to NumPy types) and an associated array of data labels, called its index."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:02.268631Z",
     "start_time": "2019-12-24T11:54:01.233953Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    4\n",
      "1    7\n",
      "2   -5\n",
      "3    3\n",
      "dtype: int64\n",
      "\n",
      "RangeIndex(start=0, stop=4, step=1)\n",
      "\n",
      "[ 4  7 -5  3]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "obj = pd.Series([4, 7, -5, 3])\n",
    "print(obj) \n",
    "print(obj.index)\n",
    "print(obj.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:02.430806Z",
     "start_time": "2019-12-24T11:54:02.271623Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    4\n",
      "a    7\n",
      "c   -5\n",
      "b    3\n",
      "dtype: int64\n",
      "\n",
      "Index(['d', 'a', 'c', 'b'], dtype='object')\n",
      "\n"
     ]
    }
   ],
   "source": [
    "obj2 = pd.Series([4, 7, -5, 3], index=['d', 'a', 'c', 'b'])\n",
    "print(obj2)\n",
    "print(obj2.index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:02.643902Z",
     "start_time": "2019-12-24T11:54:02.432505Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj2['a']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:02.812307Z",
     "start_time": "2019-12-24T11:54:02.645897Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "c   -5\n",
       "a    7\n",
       "d    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj2[['c', 'a', 'd']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:02.962361Z",
     "start_time": "2019-12-24T11:54:02.814622Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ohio      35000\n",
      "Texas     71000\n",
      "Oregon    16000\n",
      "Utah       5000\n",
      "dtype: int64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}\n",
    "obj3 = pd.Series(sdata)\n",
    "print(obj3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:03.145871Z",
     "start_time": "2019-12-24T11:54:02.964783Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "California        NaN\n",
      "Ohio          35000.0\n",
      "Oregon        16000.0\n",
      "Texas         71000.0\n",
      "dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "states = ['California', 'Ohio', 'Oregon', 'Texas']\n",
    "obj4 = pd.Series(sdata, index=states)\n",
    "print(obj4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:03.282293Z",
     "start_time": "2019-12-24T11:54:03.148390Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "California     True\n",
      "Ohio          False\n",
      "Oregon        False\n",
      "Texas         False\n",
      "dtype: bool\n",
      "\n",
      "California    False\n",
      "Ohio           True\n",
      "Oregon         True\n",
      "Texas          True\n",
      "dtype: bool\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(obj4.isnull())\n",
    "print(obj4.notnull())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:03.477576Z",
     "start_time": "2019-12-24T11:54:03.282293Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ohio      35000\n",
      "Texas     71000\n",
      "Oregon    16000\n",
      "Utah       5000\n",
      "dtype: int64\n",
      "\n",
      "California        NaN\n",
      "Ohio          35000.0\n",
      "Oregon        16000.0\n",
      "Texas         71000.0\n",
      "dtype: float64\n",
      "\n",
      "California         NaN\n",
      "Ohio           70000.0\n",
      "Oregon         32000.0\n",
      "Texas         142000.0\n",
      "Utah               NaN\n",
      "dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(obj3)\n",
    "print(obj4)\n",
    "print(obj3 + obj4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:03.586203Z",
     "start_time": "2019-12-24T11:54:03.480359Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "state\n",
      "California        NaN\n",
      "Ohio          35000.0\n",
      "Oregon        16000.0\n",
      "Texas         71000.0\n",
      "Name: population, dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "obj4.name = 'population'\n",
    "obj4.index.name = 'state'\n",
    "print(obj4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:03.710465Z",
     "start_time": "2019-12-24T11:54:03.588187Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    4\n",
      "1    7\n",
      "2   -5\n",
      "3    3\n",
      "dtype: int64\n",
      "\n",
      "Bob      4\n",
      "Steve    7\n",
      "Jeff    -5\n",
      "Ryan     3\n",
      "dtype: int64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(obj)\n",
    "obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']\n",
    "print(obj)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DataFrame\n",
    "\n",
    "A DataFrame represents a rectangular table of data and contains an ordered collection of columns, each of which can be a different value type (numeric, string, boolean, etc.)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:03.884935Z",
     "start_time": "2019-12-24T11:54:03.713457Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    state  year  pop\n",
      "0    Ohio  2000  1.5\n",
      "1    Ohio  2001  1.7\n",
      "2    Ohio  2002  3.6\n",
      "3  Nevada  2001  2.4\n",
      "4  Nevada  2002  2.9\n",
      "5  Nevada  2003  3.2\n",
      "\n"
     ]
    }
   ],
   "source": [
    "data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],\n",
    " 'year': [2000, 2001, 2002, 2001, 2002, 2003],\n",
    " 'pop': [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}\n",
    "\n",
    "frame = pd.DataFrame(data)\n",
    "print(frame)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:04.041094Z",
     "start_time": "2019-12-24T11:54:03.886930Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>year</th>\n",
       "      <th>state</th>\n",
       "      <th>pop</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2000</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2001</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>1.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2002</td>\n",
       "      <td>Ohio</td>\n",
       "      <td>3.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2001</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2002</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>2.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2003</td>\n",
       "      <td>Nevada</td>\n",
       "      <td>3.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   year   state  pop\n",
       "0  2000    Ohio  1.5\n",
       "1  2001    Ohio  1.7\n",
       "2  2002    Ohio  3.6\n",
       "3  2001  Nevada  2.4\n",
       "4  2002  Nevada  2.9\n",
       "5  2003  Nevada  3.2"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(data, columns=['year', 'state', 'pop'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:04.244915Z",
     "start_time": "2019-12-24T11:54:04.043184Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       year   state  pop debt\n",
      "one    2000    Ohio  1.5  NaN\n",
      "two    2001    Ohio  1.7  NaN\n",
      "three  2002    Ohio  3.6  NaN\n",
      "four   2001  Nevada  2.4  NaN\n",
      "five   2002  Nevada  2.9  NaN\n",
      "six    2003  Nevada  3.2  NaN\n",
      "\n",
      "Index(['year', 'state', 'pop', 'debt'], dtype='object')\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# u pass a column that isn’t contained in the dict, \n",
    "# it will appear with missing values\n",
    "\n",
    "frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'],\n",
    "                     index=['one', 'two', 'three', 'four', 'five', 'six'])\n",
    "\n",
    "print(frame2)\n",
    "print(frame2.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:04.373163Z",
     "start_time": "2019-12-24T11:54:04.244915Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0      Ohio\n",
      "1      Ohio\n",
      "2      Ohio\n",
      "3    Nevada\n",
      "4    Nevada\n",
      "5    Nevada\n",
      "Name: state, dtype: object\n",
      "\n",
      "0    2000\n",
      "1    2001\n",
      "2    2002\n",
      "3    2001\n",
      "4    2002\n",
      "5    2003\n",
      "Name: year, dtype: int64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(frame['state'])\n",
    "print(frame.year)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:04.502687Z",
     "start_time": "2019-12-24T11:54:04.374158Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "year     2002\n",
       "state    Ohio\n",
       "pop       3.6\n",
       "debt      NaN\n",
       "Name: three, dtype: object"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Rows can also be retrieved by \n",
    "# position or name with the special loc attribute\n",
    "frame2.loc['three']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:04.699057Z",
     "start_time": "2019-12-24T11:54:04.503681Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       year   state  pop debt\n",
      "one    2000    Ohio  1.5  NaN\n",
      "two    2001    Ohio  1.7  NaN\n",
      "three  2002    Ohio  3.6  NaN\n",
      "four   2001  Nevada  2.4  NaN\n",
      "five   2002  Nevada  2.9  NaN\n",
      "six    2003  Nevada  3.2  NaN\n",
      "\n",
      "       year   state  pop  debt\n",
      "one    2000    Ohio  1.5  16.5\n",
      "two    2001    Ohio  1.7  16.5\n",
      "three  2002    Ohio  3.6  16.5\n",
      "four   2001  Nevada  2.4  16.5\n",
      "five   2002  Nevada  2.9  16.5\n",
      "six    2003  Nevada  3.2  16.5\n",
      "\n",
      "       year   state  pop  debt\n",
      "one    2000    Ohio  1.5   0.0\n",
      "two    2001    Ohio  1.7   1.0\n",
      "three  2002    Ohio  3.6   2.0\n",
      "four   2001  Nevada  2.4   3.0\n",
      "five   2002  Nevada  2.9   4.0\n",
      "six    2003  Nevada  3.2   5.0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(frame2)\n",
    "frame2['debt'] = 16.5\n",
    "print(frame2)\n",
    "frame2['debt'] = np.arange(6.)\n",
    "print(frame2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:04.822895Z",
     "start_time": "2019-12-24T11:54:04.700056Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       year   state  pop  debt\n",
      "one    2000    Ohio  1.5   NaN\n",
      "two    2001    Ohio  1.7  -1.2\n",
      "three  2002    Ohio  3.6   NaN\n",
      "four   2001  Nevada  2.4  -1.5\n",
      "five   2002  Nevada  2.9  -1.7\n",
      "six    2003  Nevada  3.2   NaN\n",
      "\n"
     ]
    }
   ],
   "source": [
    "val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])\n",
    "frame2['debt'] = val\n",
    "print(frame2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:04.996306Z",
     "start_time": "2019-12-24T11:54:04.825542Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       year   state  pop  debt  eastern\n",
      "one    2000    Ohio  1.5   NaN     True\n",
      "two    2001    Ohio  1.7  -1.2     True\n",
      "three  2002    Ohio  3.6   NaN     True\n",
      "four   2001  Nevada  2.4  -1.5    False\n",
      "five   2002  Nevada  2.9  -1.7    False\n",
      "six    2003  Nevada  3.2   NaN    False\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Index(['year', 'state', 'pop', 'debt'], dtype='object')"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Assigning a column that doesn’t exist will create a new column. \n",
    "# The del keyword will delete columns as with a dict.\n",
    "# As an example of del, I first add a new column of boolean \n",
    "# values where the state column equals 'Ohio':\n",
    "\n",
    "frame2['eastern'] = frame2['state'] == 'Ohio'\n",
    "print(frame2)\n",
    "\n",
    "del frame2['eastern']\n",
    "frame2.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:05.121544Z",
     "start_time": "2019-12-24T11:54:04.998624Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      Nevada  Ohio\n",
      "2001     2.4   1.7\n",
      "2002     2.9   3.6\n",
      "2000     NaN   1.5\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# another way is a nested dict of dicts\n",
    "pop = {'Nevada': {2001: 2.4, 2002: 2.9},\n",
    "       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}\n",
    "\n",
    "# If the nested dict is passed to the DataFrame, \n",
    "# pandas will interpret the outer dict keys\n",
    "# as the columns and the inner keys as the row indices\n",
    "\n",
    "frame3 = pd.DataFrame(pop)\n",
    "print(frame3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:05.295926Z",
     "start_time": "2019-12-24T11:54:05.126331Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>2001</th>\n",
       "      <th>2002</th>\n",
       "      <th>2000</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Nevada</th>\n",
       "      <td>2.4</td>\n",
       "      <td>2.9</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>1.7</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        2001  2002  2000\n",
       "Nevada   2.4   2.9   NaN\n",
       "Ohio     1.7   3.6   1.5"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# transpose dataframe\n",
    "frame3.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:05.432228Z",
     "start_time": "2019-12-24T11:54:05.299058Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "state  Nevada  Ohio\n",
      "year               \n",
      "2001      2.4   1.7\n",
      "2002      2.9   3.6\n",
      "2000      NaN   1.5\n",
      "\n"
     ]
    }
   ],
   "source": [
    "frame3.index.name = 'year'; frame3.columns.name = 'state'\n",
    "print(frame3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using the axis parameter \n",
    "\n",
    "1) axis = 0 => row\n",
    "    * Move across/down the row axis\n",
    "    * Direction of operation is down\n",
    "\n",
    "2) axis = 1 => column\n",
    "    * Move along the column axis\n",
    "    * Direction of operation is sideways\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:06.582318Z",
     "start_time": "2019-12-24T11:54:05.434315Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>0.7</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country  beer_servings  spirit_servings  wine_servings  \\\n",
       "0  Afghanistan              0                0              0   \n",
       "1      Albania             89              132             54   \n",
       "2      Algeria             25                0             14   \n",
       "3      Andorra            245              138            312   \n",
       "4       Angola            217               57             45   \n",
       "\n",
       "   total_litres_of_pure_alcohol continent  \n",
       "0                           0.0      Asia  \n",
       "1                           4.9    Europe  \n",
       "2                           0.7    Africa  \n",
       "3                          12.4    Europe  \n",
       "4                           5.9    Africa  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drinks = pd.read_csv('http://bit.ly/drinksbycountry')\n",
    "drinks.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:06.610698Z",
     "start_time": "2019-12-24T11:54:06.587032Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       country  beer_servings  spirit_servings  wine_servings  \\\n",
       "0  Afghanistan              0                0              0   \n",
       "1      Albania             89              132             54   \n",
       "2      Algeria             25                0             14   \n",
       "3      Andorra            245              138            312   \n",
       "4       Angola            217               57             45   \n",
       "\n",
       "   total_litres_of_pure_alcohol  \n",
       "0                           0.0  \n",
       "1                           4.9  \n",
       "2                           0.7  \n",
       "3                          12.4  \n",
       "4                           5.9  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# dropping a column\n",
    "\n",
    "drinks.drop('continent', axis=1).head() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:07.987950Z",
     "start_time": "2019-12-24T11:54:06.613218Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>beer_servings</th>\n",
       "      <th>spirit_servings</th>\n",
       "      <th>wine_servings</th>\n",
       "      <th>total_litres_of_pure_alcohol</th>\n",
       "      <th>continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Asia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albania</td>\n",
       "      <td>89</td>\n",
       "      <td>132</td>\n",
       "      <td>54</td>\n",
       "      <td>4.9</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Andorra</td>\n",
       "      <td>245</td>\n",
       "      <td>138</td>\n",
       "      <td>312</td>\n",
       "      <td>12.4</td>\n",
       "      <td>Europe</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Angola</td>\n",
       "      <td>217</td>\n",
       "      <td>57</td>\n",
       "      <td>45</td>\n",
       "      <td>5.9</td>\n",
       "      <td>Africa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Antigua &amp; Barbuda</td>\n",
       "      <td>102</td>\n",
       "      <td>128</td>\n",
       "      <td>45</td>\n",
       "      <td>4.9</td>\n",
       "      <td>North America</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             country  beer_servings  spirit_servings  wine_servings  \\\n",
       "0        Afghanistan              0                0              0   \n",
       "1            Albania             89              132             54   \n",
       "3            Andorra            245              138            312   \n",
       "4             Angola            217               57             45   \n",
       "5  Antigua & Barbuda            102              128             45   \n",
       "\n",
       "   total_litres_of_pure_alcohol      continent  \n",
       "0                           0.0           Asia  \n",
       "1                           4.9         Europe  \n",
       "3                          12.4         Europe  \n",
       "4                           5.9         Africa  \n",
       "5                           4.9  North America  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# dropping a row\n",
    "\n",
    "drinks.drop(2, axis=0).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:08.176714Z",
     "start_time": "2019-12-24T11:54:07.990944Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4,)\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "beer_servings                   106.160622\n",
       "spirit_servings                  80.994819\n",
       "wine_servings                    49.450777\n",
       "total_litres_of_pure_alcohol      4.717098\n",
       "dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# default axis=0\n",
    "# mean of each column\n",
    "\n",
    "print(drinks.mean().shape)\n",
    "drinks.mean() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:08.296361Z",
     "start_time": "2019-12-24T11:54:08.178873Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(193,)\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0        0.000\n",
       "1       69.975\n",
       "2        9.925\n",
       "3      176.850\n",
       "4       81.225\n",
       "        ...   \n",
       "188    110.925\n",
       "189     29.000\n",
       "190      1.525\n",
       "191     14.375\n",
       "192     22.675\n",
       "Length: 193, dtype: float64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# mean of each row\n",
    "\n",
    "print(drinks.mean(axis=1).shape)\n",
    "drinks.mean(axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Essesntial Functionality"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Reindexing\n",
    "\n",
    "An important method on pandas objects is **reindex**, which means to create a new object with the data conformed to a new index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:08.467311Z",
     "start_time": "2019-12-24T11:54:08.296361Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    4.5\n",
      "b    7.2\n",
      "a   -5.3\n",
      "c    3.6\n",
      "dtype: float64\n",
      "\n",
      "a   -5.3\n",
      "b    7.2\n",
      "c    3.6\n",
      "d    4.5\n",
      "dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])\n",
    "print(obj)\n",
    "\n",
    "obj2 = obj.reindex(['a', 'b', 'c', 'd'])\n",
    "print(obj2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For ordered data like time series, it may be desirable to do some interpolation or filling of values when reindexing. The **method** option allows us to do this, using a\n",
    "method such as **ffill**, which forward-fills the values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:08.580511Z",
     "start_time": "2019-12-24T11:54:08.470651Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0      blue\n",
      "2    purple\n",
      "4    yellow\n",
      "dtype: object\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0      blue\n",
       "1      blue\n",
       "2    purple\n",
       "3    purple\n",
       "4    yellow\n",
       "5    yellow\n",
       "dtype: object"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])\n",
    "print(obj3)\n",
    "\n",
    "obj3.reindex(range(6), method='ffill')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:08.694431Z",
     "start_time": "2019-12-24T11:54:08.582639Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ohio</th>\n",
       "      <th>Texas</th>\n",
       "      <th>California</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>letters</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Ohio  Texas  California\n",
       "letters                         \n",
       "a           0      1           2\n",
       "c           3      4           5\n",
       "d           6      7           8"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'],\n",
    "                     columns=['Ohio', 'Texas', 'California'])\n",
    "frame.index.name = 'letters'\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:08.812642Z",
     "start_time": "2019-12-24T11:54:08.696663Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Ohio</th>\n",
       "      <th>Texas</th>\n",
       "      <th>California</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>letters</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>3.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>6.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Ohio  Texas  California\n",
       "letters                         \n",
       "a         0.0    1.0         2.0\n",
       "b         NaN    NaN         NaN\n",
       "c         3.0    4.0         5.0\n",
       "d         6.0    7.0         8.0"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame2 = frame.reindex(['a', 'b', 'c', 'd'])\n",
    "frame2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:08.910495Z",
     "start_time": "2019-12-24T11:54:08.814770Z"
    },
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Texas</th>\n",
       "      <th>Utah</th>\n",
       "      <th>California</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>letters</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Texas  Utah  California\n",
       "letters                         \n",
       "a            1   NaN           2\n",
       "c            4   NaN           5\n",
       "d            7   NaN           8"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# The columns can be reindexed with the columns keyword:\n",
    "states = ['Texas', 'Utah', 'California']\n",
    "frame.reindex(columns=states)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:09.024940Z",
     "start_time": "2019-12-24T11:54:08.911493Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Texas</th>\n",
       "      <th>Utah</th>\n",
       "      <th>California</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>letters</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>7.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Texas  Utah  California\n",
       "letters                         \n",
       "a          1.0   NaN         2.0\n",
       "b          NaN   NaN         NaN\n",
       "c          4.0   NaN         5.0\n",
       "d          7.0   NaN         8.0"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.reindex(['a', 'b', 'c', 'd'], columns=states)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:09.141015Z",
     "start_time": "2019-12-24T11:54:09.024940Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Texas</th>\n",
       "      <th>Utah</th>\n",
       "      <th>California</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>letters</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Texas  Utah  California\n",
       "letters                         \n",
       "a            1     0           2\n",
       "b            0     0           0\n",
       "c            4     0           5\n",
       "d            7     0           8"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.reindex(['a', 'b', 'c', 'd'], columns=states, fill_value=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dropping Entries from an axis\n",
    "\n",
    "**drop** method will return a new object with the indicated value or values deleted from an axis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:09.250070Z",
     "start_time": "2019-12-24T11:54:09.141015Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.0\n",
       "b    1.0\n",
       "c    2.0\n",
       "d    3.0\n",
       "e    4.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:09.396400Z",
     "start_time": "2019-12-24T11:54:09.251603Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.0\n",
       "b    1.0\n",
       "d    3.0\n",
       "e    4.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_obj = obj.drop('c')\n",
    "new_obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:09.598954Z",
     "start_time": "2019-12-24T11:54:09.396400Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.0\n",
       "b    1.0\n",
       "e    4.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj.drop(['d', 'c'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "With DataFrame, index values can be deleted from either axis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:09.802608Z",
     "start_time": "2019-12-24T11:54:09.599952Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Ohio        0    1      2     3\n",
       "Colorado    4    5      6     7\n",
       "Utah        8    9     10    11\n",
       "New York   12   13     14    15"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.DataFrame(np.arange(16).reshape((4, 4)), index=['Ohio', 'Colorado', 'Utah', 'New York'],\n",
    "                    columns=['one', 'two', 'three', 'four'])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:09.966588Z",
     "start_time": "2019-12-24T11:54:09.803605Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Utah        8    9     10    11\n",
       "New York   12   13     14    15"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Calling drop with a sequence of labels will \n",
    "# drop values from the row labels (axis 0):\n",
    "data.drop(['Colorado', 'Ohio'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:10.096537Z",
     "start_time": "2019-12-24T11:54:09.966588Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>three</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>8</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>12</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  three\n",
       "Ohio        0      2\n",
       "Colorado    4      6\n",
       "Utah        8     10\n",
       "New York   12     14"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drop values from columns: pass axis=1 or axis='columns'\n",
    "\n",
    "data.drop(['two', 'four'], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:10.243523Z",
     "start_time": "2019-12-24T11:54:10.097533Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0.0\n",
      "b    1.0\n",
      "c    2.0\n",
      "d    3.0\n",
      "e    4.0\n",
      "dtype: float64\n",
      "\n",
      "a    0.0\n",
      "b    1.0\n",
      "d    3.0\n",
      "e    4.0\n",
      "dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(obj)\n",
    "obj.drop('c', inplace=True)\n",
    "print(obj)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Indexing, Selection, and Filtering"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### indexing\n",
    "\n",
    "Series indexing (obj[...]) works analogously to NumPy array indexing, except you can use the Series’s index values instead of only integers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:10.362801Z",
     "start_time": "2019-12-24T11:54:10.243523Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.0\n",
       "b    1.0\n",
       "c    2.0\n",
       "d    3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:10.493177Z",
     "start_time": "2019-12-24T11:54:10.362801Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n",
      "\n",
      "1.0\n",
      "\n",
      "c    2.0\n",
      "d    3.0\n",
      "dtype: float64\n",
      "\n",
      "b    1.0\n",
      "a    0.0\n",
      "d    3.0\n",
      "dtype: float64\n",
      "\n",
      "b    1.0\n",
      "d    3.0\n",
      "dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(obj['b'])\n",
    "print(obj[1])\n",
    "print(obj[2:4])\n",
    "print(obj[['b', 'a', 'd']])\n",
    "print(obj[[1, 3]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:10.631638Z",
     "start_time": "2019-12-24T11:54:10.494174Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    0.0\n",
      "b    1.0\n",
      "dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(obj[obj < 2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Slicing with labels behaves differently than normal Python slicing in that the **end‐point is inclusive**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:10.937208Z",
     "start_time": "2019-12-24T11:54:10.634638Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    1.0\n",
       "c    2.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj['b': 'c']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Indexing into a DataFrame is for retrieving one or more columns either with a single value or sequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:11.110423Z",
     "start_time": "2019-12-24T11:54:10.939206Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Ohio        0    1      2     3\n",
       "Colorado    4    5      6     7\n",
       "Utah        8    9     10    11\n",
       "New York   12   13     14    15"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.DataFrame(np.arange(16).reshape((4, 4)),\n",
    "                    index=['Ohio', 'Colorado', 'Utah', 'New York'],\n",
    "                    columns=['one', 'two', 'three', 'four'])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:11.335656Z",
     "start_time": "2019-12-24T11:54:11.111464Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ohio         1\n",
      "Colorado     5\n",
      "Utah         9\n",
      "New York    13\n",
      "Name: two, dtype: int32\n",
      "\n",
      "          three  four\n",
      "Ohio          2     3\n",
      "Colorado      6     7\n",
      "Utah         10    11\n",
      "New York     14    15\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(data['two'])\n",
    "print(data[['three', 'four']])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The row selection syntax **data[:2]** is provided as a convenience. Passing a single element or a list to the [ ] operator selects columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:11.486571Z",
     "start_time": "2019-12-24T11:54:11.335656Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Ohio        0    1      2     3\n",
       "Colorado    4    5      6     7"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[:2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:11.626605Z",
     "start_time": "2019-12-24T11:54:11.491623Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Colorado    4    5      6     7\n",
       "Utah        8    9     10    11\n",
       "New York   12   13     14    15"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['three'] > 5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:11.731106Z",
     "start_time": "2019-12-24T11:54:11.626605Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            one    two  three   four\n",
       "Ohio       True   True   True   True\n",
       "Colorado   True  False  False  False\n",
       "Utah      False  False  False  False\n",
       "New York  False  False  False  False"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data < 5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-22T16:14:52.753331Z",
     "start_time": "2019-12-22T16:14:52.745801Z"
    }
   },
   "source": [
    "#### setting\n",
    "Setting using these methods modifies the corresponding section of the Series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:11.854456Z",
     "start_time": "2019-12-24T11:54:11.733101Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0.0\n",
       "b    5.0\n",
       "c    5.0\n",
       "d    3.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj['b': 'c'] = 5\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:11.971222Z",
     "start_time": "2019-12-24T11:54:11.855652Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Ohio        0    1      2     3\n",
       "Colorado    4    5      6     7\n",
       "Utah        8    9     10    11\n",
       "New York   12   13     14    15"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.DataFrame(np.arange(16).reshape((4, 4)),\n",
    "                    index=['Ohio', 'Colorado', 'Utah', 'New York'],\n",
    "                    columns=['one', 'two', 'three', 'four'])\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:12.124945Z",
     "start_time": "2019-12-24T11:54:11.972250Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three  four\n",
       "Ohio        0    0      0     0\n",
       "Colorado    0    5      6     7\n",
       "Utah        8    9     10    11\n",
       "New York   12   13     14    15"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data < 5] = 0\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Selection with loc, iloc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For DataFrame label-indexing on the rows, I introduce the special indexing operators loc and iloc. They enable you to select a subset of the rows and columns from a DataFrame with NumPy-like notation using either axis labels (loc) or integers (iloc)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:12.244168Z",
     "start_time": "2019-12-24T11:54:12.124945Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "two      5\n",
       "three    6\n",
       "Name: Colorado, dtype: int32"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#  select a single row and multiple columns by label\n",
    "# loc[Row, col]\n",
    "data.loc['Colorado', ['two', 'three']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:12.359593Z",
     "start_time": "2019-12-24T11:54:12.244168Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "two      5\n",
       "three    6\n",
       "Name: Colorado, dtype: int32"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# with integers using iloc\n",
    "data.iloc[1, [1, 2]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:12.517066Z",
     "start_time": "2019-12-24T11:54:12.361868Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one       8\n",
      "two       9\n",
      "three    10\n",
      "four     11\n",
      "Name: Utah, dtype: int32\n",
      "\n",
      "          four  one  two\n",
      "Colorado     7    0    5\n",
      "Utah        11    8    9\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(data.iloc[2])\n",
    "print(data.iloc[[1, 2], [3, 0, 1]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:12.680750Z",
     "start_time": "2019-12-24T11:54:12.519063Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          one  two  three  four\n",
      "Ohio        0    0      0     0\n",
      "Colorado    0    5      6     7\n",
      "Utah        8    9     10    11\n",
      "New York   12   13     14    15\n",
      "\n",
      "Ohio        0\n",
      "Colorado    5\n",
      "Utah        9\n",
      "Name: two, dtype: int32\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(data)\n",
    "print(data.loc[:'Utah', 'two'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:12.840886Z",
     "start_time": "2019-12-24T11:54:12.682744Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>New York</th>\n",
       "      <td>12</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          one  two  three\n",
       "Colorado    0    5      6\n",
       "Utah        8    9     10\n",
       "New York   12   13     14"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.iloc[:, :3][data.three > 5]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Arithmetic and Data Alignment"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An important pandas feature for some applications is the behavior of arithmetic\n",
    "between objects with different indexes. When you are adding together objects, if any index pairs are not the same, \n",
    "the respective index in the result will be the union of the index pairs. \n",
    "For users with database experience, this is similar to an automatic outer join on the index labels."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:12.950857Z",
     "start_time": "2019-12-24T11:54:12.841887Z"
    }
   },
   "outputs": [],
   "source": [
    "s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])\n",
    "s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:13.066293Z",
     "start_time": "2019-12-24T11:54:12.950857Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a    7.3\n",
      "c   -2.5\n",
      "d    3.4\n",
      "e    1.5\n",
      "dtype: float64\n",
      "\n",
      "a   -2.1\n",
      "c    3.6\n",
      "e   -1.5\n",
      "f    4.0\n",
      "g    3.1\n",
      "dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(s1)\n",
    "print(s2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:13.203969Z",
     "start_time": "2019-12-24T11:54:13.068249Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    5.2\n",
       "c    1.1\n",
       "d    NaN\n",
       "e    0.0\n",
       "f    NaN\n",
       "g    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1 + s2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In the case of DataFrame, alignment is performed on both the rows and the columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:13.326232Z",
     "start_time": "2019-12-24T11:54:13.205001Z"
    }
   },
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list('bcd'),\n",
    "                   index=['Ohio', 'Texas', 'Colorado'])\n",
    "\n",
    "df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),\n",
    "                   index=['Utah', 'Ohio', 'Texas', 'Oregon'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:13.459283Z",
     "start_time": "2019-12-24T11:54:13.326232Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            b    c    d\n",
      "Ohio      0.0  1.0  2.0\n",
      "Texas     3.0  4.0  5.0\n",
      "Colorado  6.0  7.0  8.0\n",
      "\n",
      "          b     d     e\n",
      "Utah    0.0   1.0   2.0\n",
      "Ohio    3.0   4.0   5.0\n",
      "Texas   6.0   7.0   8.0\n",
      "Oregon  9.0  10.0  11.0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(df1)\n",
    "print(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:13.621405Z",
     "start_time": "2019-12-24T11:54:13.461210Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Colorado</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oregon</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texas</th>\n",
       "      <td>9.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            b   c     d   e\n",
       "Colorado  NaN NaN   NaN NaN\n",
       "Ohio      3.0 NaN   6.0 NaN\n",
       "Oregon    NaN NaN   NaN NaN\n",
       "Texas     9.0 NaN  12.0 NaN\n",
       "Utah      NaN NaN   NaN NaN"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 + df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:13.778639Z",
     "start_time": "2019-12-24T11:54:13.623331Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   A\n",
      "0  1\n",
      "1  2\n",
      "\n",
      "   B\n",
      "0  3\n",
      "1  4\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df1 = pd.DataFrame({'A': [1, 2]})\n",
    "df2 = pd.DataFrame({'B': [3, 4]})\n",
    "print(df1)\n",
    "print(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:13.907441Z",
     "start_time": "2019-12-24T11:54:13.788272Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B\n",
       "0 NaN NaN\n",
       "1 NaN NaN"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 - df2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In arithmetic operations between differently indexed objects, you might want to fill\n",
    "with a special value, like 0, when an axis label is found in one object but not the other"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:14.021709Z",
     "start_time": "2019-12-24T11:54:13.915170Z"
    }
   },
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)), columns=list(\"abcd\"))\n",
    "df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)), columns=list('abcde'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:14.142549Z",
     "start_time": "2019-12-24T11:54:14.021709Z"
    }
   },
   "outputs": [],
   "source": [
    "df2.loc[1, 'b'] = np.nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:14.273758Z",
     "start_time": "2019-12-24T11:54:14.143900Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     a    b     c     d\n",
      "0  0.0  1.0   2.0   3.0\n",
      "1  4.0  5.0   6.0   7.0\n",
      "2  8.0  9.0  10.0  11.0\n",
      "\n",
      "      a     b     c     d     e\n",
      "0   0.0   1.0   2.0   3.0   4.0\n",
      "1   5.0   NaN   7.0   8.0   9.0\n",
      "2  10.0  11.0  12.0  13.0  14.0\n",
      "3  15.0  16.0  17.0  18.0  19.0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(df1)\n",
    "print(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:14.395478Z",
     "start_time": "2019-12-24T11:54:14.273758Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>18.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      a     b     c     d   e\n",
       "0   0.0   2.0   4.0   6.0 NaN\n",
       "1   9.0   NaN  13.0  15.0 NaN\n",
       "2  18.0  20.0  22.0  24.0 NaN\n",
       "3   NaN   NaN   NaN   NaN NaN"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Adding these together results in NA values in the locations that don’t overlap\n",
    "df1 + df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:14.522987Z",
     "start_time": "2019-12-24T11:54:14.397472Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>18.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      a     b     c     d     e\n",
       "0   0.0   2.0   4.0   6.0   4.0\n",
       "1   9.0   5.0  13.0  15.0   9.0\n",
       "2  18.0  20.0  22.0  24.0  14.0\n",
       "3  15.0  16.0  17.0  18.0  19.0"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Using the add method on df1,one pass df2 and an argument to fill_value\n",
    "df1.add(df2, fill_value=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:14.645859Z",
     "start_time": "2019-12-24T11:54:14.524282Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>inf</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.250</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.142857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.125</td>\n",
       "      <td>0.111111</td>\n",
       "      <td>0.100000</td>\n",
       "      <td>0.090909</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       a         b         c         d\n",
       "0    inf  1.000000  0.500000  0.333333\n",
       "1  0.250  0.200000  0.166667  0.142857\n",
       "2  0.125  0.111111  0.100000  0.090909"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 1 / df1\n",
    "# or\n",
    "df1.rdiv(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:14.762982Z",
     "start_time": "2019-12-24T11:54:14.648478Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          b     d     e\n",
      "utah    0.0   1.0   2.0\n",
      "ohio    3.0   4.0   5.0\n",
      "texas   6.0   7.0   8.0\n",
      "oregon  9.0  10.0  11.0\n",
      "\n",
      "b    0.0\n",
      "d    1.0\n",
      "e    2.0\n",
      "Name: utah, dtype: float64\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# operations between DataFrame and Series\n",
    "frame = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'),\n",
    "                    index=(\"utah\", \"ohio\", \"texas\", \"oregon\"))\n",
    "series = frame.iloc[0]\n",
    "print(frame)\n",
    "print(series)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By default, arithmetic between DataFrame and Series matches the index of the Series\n",
    "on the DataFrame’s columns, broadcasting down the rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:15.272570Z",
     "start_time": "2019-12-24T11:54:14.764980Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>utah</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ohio</th>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>texas</th>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>oregon</th>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          b    d    e\n",
       "utah    0.0  0.0  0.0\n",
       "ohio    3.0  3.0  3.0\n",
       "texas   6.0  6.0  6.0\n",
       "oregon  9.0  9.0  9.0"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame - series"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If an index value is not found in either the DataFrame’s columns or the Series’s index,\n",
    "the objects will be reindexed to form the union"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:15.785596Z",
     "start_time": "2019-12-24T11:54:15.274639Z"
    }
   },
   "outputs": [],
   "source": [
    "series2 = pd.Series(range(3), index=list('bef'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:15.975844Z",
     "start_time": "2019-12-24T11:54:15.789282Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "      <th>f</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>utah</th>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ohio</th>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>texas</th>\n",
       "      <td>6.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>oregon</th>\n",
       "      <td>9.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          b   d     e   f\n",
       "utah    0.0 NaN   3.0 NaN\n",
       "ohio    3.0 NaN   6.0 NaN\n",
       "texas   6.0 NaN   9.0 NaN\n",
       "oregon  9.0 NaN  12.0 NaN"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame + series2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:16.113109Z",
     "start_time": "2019-12-24T11:54:15.978953Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "utah       1.0\n",
       "ohio       4.0\n",
       "texas      7.0\n",
       "oregon    10.0\n",
       "Name: d, dtype: float64"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# broadcasting over columns\n",
    "series3 = frame['d']\n",
    "series3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:16.261347Z",
     "start_time": "2019-12-24T11:54:16.115103Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>utah</th>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ohio</th>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>texas</th>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>oregon</th>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          b    d    e\n",
       "utah   -1.0  0.0  1.0\n",
       "ohio   -1.0  0.0  1.0\n",
       "texas  -1.0  0.0  1.0\n",
       "oregon -1.0  0.0  1.0"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.sub(series3, axis='index')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The axis number that you pass is the axis to match on. In this case we mean to match\n",
    "on the DataFrame’s row index (axis='index' or axis=0) and broadcast across."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Function Application and Mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:16.578884Z",
     "start_time": "2019-12-24T11:54:16.263428Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>-1.846691</td>\n",
       "      <td>-0.622582</td>\n",
       "      <td>-1.542595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0.818697</td>\n",
       "      <td>0.730596</td>\n",
       "      <td>0.458859</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texas</th>\n",
       "      <td>0.612735</td>\n",
       "      <td>0.593502</td>\n",
       "      <td>-0.631133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oregon</th>\n",
       "      <td>-0.067950</td>\n",
       "      <td>-1.023214</td>\n",
       "      <td>0.115138</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               b         d         e\n",
       "Utah   -1.846691 -0.622582 -1.542595\n",
       "Ohio    0.818697  0.730596  0.458859\n",
       "Texas   0.612735  0.593502 -0.631133\n",
       "Oregon -0.067950 -1.023214  0.115138"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'),\n",
    "                    index=['Utah', 'Ohio', 'Texas', 'Oregon'])\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:16.763901Z",
     "start_time": "2019-12-24T11:54:16.581885Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>1.846691</td>\n",
       "      <td>0.622582</td>\n",
       "      <td>1.542595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0.818697</td>\n",
       "      <td>0.730596</td>\n",
       "      <td>0.458859</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texas</th>\n",
       "      <td>0.612735</td>\n",
       "      <td>0.593502</td>\n",
       "      <td>0.631133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oregon</th>\n",
       "      <td>0.067950</td>\n",
       "      <td>1.023214</td>\n",
       "      <td>0.115138</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               b         d         e\n",
       "Utah    1.846691  0.622582  1.542595\n",
       "Ohio    0.818697  0.730596  0.458859\n",
       "Texas   0.612735  0.593502  0.631133\n",
       "Oregon  0.067950  1.023214  0.115138"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.abs(frame)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:16.884947Z",
     "start_time": "2019-12-24T11:54:16.766103Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    2.665389\n",
       "d    1.753811\n",
       "e    2.001455\n",
       "dtype: float64"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Another frequent operation is applying a function on \n",
    "# one-dimensional arrays to each column or row. \n",
    "# DataFrame’s apply method does exactly this\n",
    "\n",
    "f = lambda x: x.max() - x.min()\n",
    "frame.apply(f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here the function f, which computes the difference between the maximum and minimum of a Series, is **invoked once on each column** in frame. The result is a Series having the columns of frame as its index."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:17.003427Z",
     "start_time": "2019-12-24T11:54:16.887837Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Utah      1.224110\n",
       "Ohio      0.359838\n",
       "Texas     1.243868\n",
       "Oregon    1.138353\n",
       "dtype: float64"
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# If you pass axis='columns' to apply, \n",
    "# the function will be invoked once per row\n",
    "\n",
    "frame.apply(f, axis='columns')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:17.152859Z",
     "start_time": "2019-12-24T11:54:17.003427Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-1.846691</td>\n",
       "      <td>-1.023214</td>\n",
       "      <td>-1.542595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>0.818697</td>\n",
       "      <td>0.730596</td>\n",
       "      <td>0.458859</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            b         d         e\n",
       "min -1.846691 -1.023214 -1.542595\n",
       "max  0.818697  0.730596  0.458859"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# returning a Series with multiple values\n",
    "\n",
    "def f(x):\n",
    "    return pd.Series([x.min(), x.max()], index=['min', 'max'])\n",
    "\n",
    "frame.apply(f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Element-wise Python functions can be used, too. Suppose you wanted to compute a\n",
    "formatted string from each floating-point value in frame. You can do this with \n",
    "**applymap**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:17.261634Z",
     "start_time": "2019-12-24T11:54:17.155897Z"
    }
   },
   "outputs": [],
   "source": [
    "format = lambda x: f'{x:.2f}'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:17.398915Z",
     "start_time": "2019-12-24T11:54:17.263630Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "               b         d         e\n",
      "Utah   -1.846691 -0.622582 -1.542595\n",
      "Ohio    0.818697  0.730596  0.458859\n",
      "Texas   0.612735  0.593502 -0.631133\n",
      "Oregon -0.067950 -1.023214  0.115138\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>d</th>\n",
       "      <th>e</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Utah</th>\n",
       "      <td>-1.85</td>\n",
       "      <td>-0.62</td>\n",
       "      <td>-1.54</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ohio</th>\n",
       "      <td>0.82</td>\n",
       "      <td>0.73</td>\n",
       "      <td>0.46</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Texas</th>\n",
       "      <td>0.61</td>\n",
       "      <td>0.59</td>\n",
       "      <td>-0.63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Oregon</th>\n",
       "      <td>-0.07</td>\n",
       "      <td>-1.02</td>\n",
       "      <td>0.12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            b      d      e\n",
       "Utah    -1.85  -0.62  -1.54\n",
       "Ohio     0.82   0.73   0.46\n",
       "Texas    0.61   0.59  -0.63\n",
       "Oregon  -0.07  -1.02   0.12"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(frame)\n",
    "frame.applymap(format)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The reason for the name applymap is that Series has a **map** method for applying an\n",
    "element-wise function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:17.518935Z",
     "start_time": "2019-12-24T11:54:17.400935Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Utah      -1.54\n",
       "Ohio       0.46\n",
       "Texas     -0.63\n",
       "Oregon     0.12\n",
       "Name: e, dtype: object"
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame['e'].map(format)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sorting and Ranking"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Sorting\n",
    "\n",
    "Sorting a dataset by some criterion is another important built-in operation. To sort\n",
    "lexicographically by row or column index, use the **sort_index** method, which returns\n",
    "a new, sorted object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:17.662604Z",
     "start_time": "2019-12-24T11:54:17.522098Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "b    0\n",
       "d    1\n",
       "a    2\n",
       "c    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj = pd.Series(range(4), index=['b', 'd', 'a', 'c'])\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:17.793647Z",
     "start_time": "2019-12-24T11:54:17.664561Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    2\n",
       "b    0\n",
       "c    3\n",
       "d    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:17.984142Z",
     "start_time": "2019-12-24T11:54:17.795974Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>d</th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       d  a  b  c\n",
       "one    4  5  6  7\n",
       "three  0  1  2  3"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame(np.arange(8).reshape((2, 4)), index=['three', 'one'],\n",
    "                     columns=['d', 'a', 'b', 'c'])\n",
    "frame.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:18.202334Z",
     "start_time": "2019-12-24T11:54:17.985140Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>d</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       a  b  c  d\n",
       "three  1  2  3  0\n",
       "one    5  6  7  4"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.sort_index(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:18.322544Z",
     "start_time": "2019-12-24T11:54:18.205743Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>d</th>\n",
       "      <th>c</th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>three</th>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>4</td>\n",
       "      <td>7</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       d  c  b  a\n",
       "three  0  3  2  1\n",
       "one    4  7  6  5"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.sort_index(axis=1, ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To sort a Series by its values, use its **sort_values** method.\n",
    "Any missing values are sorted to the end of the Series by default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:18.444610Z",
     "start_time": "2019-12-24T11:54:18.323543Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0   -4\n",
       "3    2\n",
       "2    3\n",
       "1    7\n",
       "dtype: int64"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj = pd.Series([-4, 7, 3, 2])\n",
    "obj.sort_values()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:18.561066Z",
     "start_time": "2019-12-24T11:54:18.448348Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4   -3.0\n",
       "5    2.0\n",
       "0    4.0\n",
       "2    7.0\n",
       "1    NaN\n",
       "3    NaN\n",
       "dtype: float64"
      ]
     },
     "execution_count": 94,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])\n",
    "obj.sort_values()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "When sorting a DataFrame, you can use the data in one or more columns as the sort\n",
    "keys. To do so, pass one or more column names to the by option of sort_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:18.674098Z",
     "start_time": "2019-12-24T11:54:18.562063Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   b  a\n",
       "0  4  0\n",
       "1  7  1\n",
       "2 -3  0\n",
       "3  2  1"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]})\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:18.782844Z",
     "start_time": "2019-12-24T11:54:18.675129Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   b  a\n",
       "2 -3  0\n",
       "3  2  1\n",
       "0  4  0\n",
       "1  7  1"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.sort_values(by='b')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:18.885242Z",
     "start_time": "2019-12-24T11:54:18.783879Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   b  a\n",
       "2 -3  0\n",
       "0  4  0\n",
       "3  2  1\n",
       "1  7  1"
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.sort_values(by=['a', 'b'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Ranking\n",
    "\n",
    "*Ranking* assigns ranks from one through the number of valid data points in an array.\n",
    "The rank methods for Series and DataFrame are the place to look; by default rank\n",
    "breaks ties by assigning each group the mean rank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:19.058017Z",
     "start_time": "2019-12-24T11:54:18.887690Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    6.5\n",
       "1    1.0\n",
       "2    6.5\n",
       "3    4.5\n",
       "4    3.0\n",
       "5    2.0\n",
       "6    4.5\n",
       "dtype: float64"
      ]
     },
     "execution_count": 98,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj = pd.Series([7, -5, 7, 4, 2, 0, 4])\n",
    "obj.rank()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:19.209466Z",
     "start_time": "2019-12-24T11:54:19.059022Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    6.0\n",
       "1    1.0\n",
       "2    7.0\n",
       "3    4.0\n",
       "4    3.0\n",
       "5    2.0\n",
       "6    5.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# assigning rank according to the order they're observed first in the data\n",
    "obj.rank(method='first')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:19.352734Z",
     "start_time": "2019-12-24T11:54:19.211753Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    2.0\n",
       "1    7.0\n",
       "2    2.0\n",
       "3    4.0\n",
       "4    5.0\n",
       "5    6.0\n",
       "6    4.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj.rank(ascending=False, method='max')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:19.499605Z",
     "start_time": "2019-12-24T11:54:19.352734Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4.3</td>\n",
       "      <td>0</td>\n",
       "      <td>-2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7.0</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>8.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2.0</td>\n",
       "      <td>1</td>\n",
       "      <td>-2.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     b  a    c\n",
       "0  4.3  0 -2.0\n",
       "1  7.0  1  5.0\n",
       "2 -3.0  0  8.0\n",
       "3  2.0  1 -2.5"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame = pd.DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0, 1], 'c': [-2, 5, 8, -2.5]})\n",
    "frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:19.644123Z",
     "start_time": "2019-12-24T11:54:19.502600Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>b</th>\n",
       "      <th>a</th>\n",
       "      <th>c</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     b    a    c\n",
       "0  3.0  2.0  1.0\n",
       "1  3.0  1.0  2.0\n",
       "2  1.0  2.0  3.0\n",
       "3  3.0  2.0  1.0"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "frame.rank(axis='columns')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Axis Indexes with Duplicate Labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:19.825688Z",
     "start_time": "2019-12-24T11:54:19.646865Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "a    1\n",
       "b    2\n",
       "b    3\n",
       "c    4\n",
       "dtype: int64"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj = pd.Series(range(5), index=['a', 'a', 'b', 'b', 'c'])\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:19.978965Z",
     "start_time": "2019-12-24T11:54:19.828121Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj.index.is_unique"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:20.122066Z",
     "start_time": "2019-12-24T11:54:19.980367Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    0\n",
       "a    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj.a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:20.293573Z",
     "start_time": "2019-12-24T11:54:20.124045Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>-2.054623</td>\n",
       "      <td>0.654964</td>\n",
       "      <td>-0.431453</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.654782</td>\n",
       "      <td>0.776429</td>\n",
       "      <td>1.175684</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>-0.485586</td>\n",
       "      <td>-0.960232</td>\n",
       "      <td>-1.368606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>0.205797</td>\n",
       "      <td>0.266294</td>\n",
       "      <td>2.007870</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "a -2.054623  0.654964 -0.431453\n",
       "a  0.654782  0.776429  1.175684\n",
       "b -0.485586 -0.960232 -1.368606\n",
       "b  0.205797  0.266294  2.007870"
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(np.random.randn(4, 3), index=['a', 'a', 'b', 'b'])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:20.444163Z",
     "start_time": "2019-12-24T11:54:20.295569Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>-2.054623</td>\n",
       "      <td>0.654964</td>\n",
       "      <td>-0.431453</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.654782</td>\n",
       "      <td>0.776429</td>\n",
       "      <td>1.175684</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2\n",
       "a -2.054623  0.654964 -0.431453\n",
       "a  0.654782  0.776429  1.175684"
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.loc['a']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Summarizing and Computing Descriptive Statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:20.549520Z",
     "start_time": "2019-12-24T11:54:20.446595Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.40</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>7.10</td>\n",
       "      <td>-4.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>0.75</td>\n",
       "      <td>-1.43</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    one   two\n",
       "a  1.40   NaN\n",
       "b  7.10 -4.50\n",
       "c   NaN   NaN\n",
       "d  0.75 -1.43"
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan],\n",
    "                   [0.75, -1.43]],\n",
    "                  index=['a', 'b', 'c', 'd'],\n",
    "                  columns=['one', 'two'])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:20.684771Z",
     "start_time": "2019-12-24T11:54:20.549520Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "one    9.25\n",
       "two   -5.93\n",
       "dtype: float64"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:20.818099Z",
     "start_time": "2019-12-24T11:54:20.687422Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a    1.40\n",
       "b    2.60\n",
       "c    0.00\n",
       "d   -0.68\n",
       "dtype: float64"
      ]
     },
     "execution_count": 110,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sum(axis='columns')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:20.936482Z",
     "start_time": "2019-12-24T11:54:20.818099Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "a     NaN\n",
       "b    2.60\n",
       "c     NaN\n",
       "d   -0.68\n",
       "dtype: float64"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sum(axis='columns', skipna=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Some methods, like idxmin and idxmax, return indirect statistics like the index value\n",
    "where the minimum or maximum values are attained"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:21.074783Z",
     "start_time": "2019-12-24T11:54:20.937480Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "one    b\n",
      "two    d\n",
      "dtype: object\n",
      "\n",
      "one    d\n",
      "two    b\n",
      "dtype: object\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(df.idxmax())\n",
    "print(df.idxmin())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:21.194122Z",
     "start_time": "2019-12-24T11:54:21.076618Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.40</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>8.50</td>\n",
       "      <td>-4.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>9.25</td>\n",
       "      <td>-5.93</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    one   two\n",
       "a  1.40   NaN\n",
       "b  8.50 -4.50\n",
       "c   NaN   NaN\n",
       "d  9.25 -5.93"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# accmulation\n",
    "df.cumsum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-22T20:42:48.976671Z",
     "start_time": "2019-12-22T20:42:48.961382Z"
    }
   },
   "source": [
    "Another type of method is neither a reduction nor an accumulation. **describe** is one\n",
    "such example, producing multiple summary statistics in one shot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:21.344016Z",
     "start_time": "2019-12-24T11:54:21.195119Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>3.083333</td>\n",
       "      <td>-2.965000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.493685</td>\n",
       "      <td>2.170818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.750000</td>\n",
       "      <td>-4.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.075000</td>\n",
       "      <td>-3.732500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.400000</td>\n",
       "      <td>-2.965000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>4.250000</td>\n",
       "      <td>-2.197500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>7.100000</td>\n",
       "      <td>-1.430000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            one       two\n",
       "count  3.000000  2.000000\n",
       "mean   3.083333 -2.965000\n",
       "std    3.493685  2.170818\n",
       "min    0.750000 -4.500000\n",
       "25%    1.075000 -3.732500\n",
       "50%    1.400000 -2.965000\n",
       "75%    4.250000 -2.197500\n",
       "max    7.100000 -1.430000"
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:21.511393Z",
     "start_time": "2019-12-24T11:54:21.344016Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count     16\n",
       "unique     3\n",
       "top        a\n",
       "freq       8\n",
       "dtype: object"
      ]
     },
     "execution_count": 115,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# On non-numeric data, describe produces alternative summary statistics:\n",
    "\n",
    "obj = pd.Series(['a', 'a', 'b', 'c'] * 4)\n",
    "obj.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:21.653404Z",
     "start_time": "2019-12-24T11:54:21.515120Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1.40</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>7.10</td>\n",
       "      <td>-4.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>d</th>\n",
       "      <td>0.75</td>\n",
       "      <td>-1.43</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    one   two\n",
       "a  1.40   NaN\n",
       "b  7.10 -4.50\n",
       "c   NaN   NaN\n",
       "d  0.75 -1.43"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-22T20:47:42.840846Z",
     "start_time": "2019-12-22T20:47:42.833552Z"
    }
   },
   "source": [
    "### Correlation and Covariance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:23.076066Z",
     "start_time": "2019-12-24T11:54:21.655517Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas_datareader as  web"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:27.953329Z",
     "start_time": "2019-12-24T11:54:23.079097Z"
    }
   },
   "outputs": [],
   "source": [
    "all_data = {ticker: web.get_data_yahoo(ticker) for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:28.701569Z",
     "start_time": "2019-12-24T11:54:27.953329Z"
    }
   },
   "outputs": [],
   "source": [
    "price = pd.DataFrame({ticker: data['Adj Close']\n",
    "                      for ticker, data in all_data.items()})\n",
    "volume = pd.DataFrame({ticker: data['Volume']\n",
    "                       for ticker, data in all_data.items()})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:28.944129Z",
     "start_time": "2019-12-24T11:54:28.701569Z"
    }
   },
   "outputs": [],
   "source": [
    "returns = price.pct_change()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:29.202785Z",
     "start_time": "2019-12-24T11:54:28.944129Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>IBM</th>\n",
       "      <th>MSFT</th>\n",
       "      <th>GOOG</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2019-12-17</th>\n",
       "      <td>0.001965</td>\n",
       "      <td>0.000671</td>\n",
       "      <td>-0.005401</td>\n",
       "      <td>-0.004445</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-12-18</th>\n",
       "      <td>-0.002389</td>\n",
       "      <td>0.001416</td>\n",
       "      <td>-0.002069</td>\n",
       "      <td>-0.001845</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-12-19</th>\n",
       "      <td>0.001001</td>\n",
       "      <td>0.001116</td>\n",
       "      <td>0.008681</td>\n",
       "      <td>0.002528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-12-20</th>\n",
       "      <td>-0.002071</td>\n",
       "      <td>0.007655</td>\n",
       "      <td>0.010918</td>\n",
       "      <td>-0.004757</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-12-23</th>\n",
       "      <td>0.016318</td>\n",
       "      <td>-0.000295</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.000556</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                AAPL       IBM      MSFT      GOOG\n",
       "Date                                              \n",
       "2019-12-17  0.001965  0.000671 -0.005401 -0.004445\n",
       "2019-12-18 -0.002389  0.001416 -0.002069 -0.001845\n",
       "2019-12-19  0.001001  0.001116  0.008681  0.002528\n",
       "2019-12-20 -0.002071  0.007655  0.010918 -0.004757\n",
       "2019-12-23  0.016318 -0.000295  0.000000 -0.000556"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "returns.tail()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The **corr** method of Series computes the correlation of the overlapping, non-NA,\n",
    "aligned-by-index values in two Series. Relatedly, **cov** computes the covariance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:30.201205Z",
     "start_time": "2019-12-24T11:54:29.206950Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "9.227138328874263e-05\n",
      "\n",
      "0.48490365831130916\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(returns['MSFT'].cov(returns['IBM']))\n",
    "print(returns['MSFT'].corr(returns['IBM']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:30.217210Z",
     "start_time": "2019-12-24T11:54:30.204759Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.48490365831130916"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "returns.MSFT.corr(returns.IBM)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:30.398635Z",
     "start_time": "2019-12-24T11:54:30.219543Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>IBM</th>\n",
       "      <th>MSFT</th>\n",
       "      <th>GOOG</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.399340</td>\n",
       "      <td>0.573224</td>\n",
       "      <td>0.521823</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>0.399340</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.484904</td>\n",
       "      <td>0.410386</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MSFT</th>\n",
       "      <td>0.573224</td>\n",
       "      <td>0.484904</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.658704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GOOG</th>\n",
       "      <td>0.521823</td>\n",
       "      <td>0.410386</td>\n",
       "      <td>0.658704</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          AAPL       IBM      MSFT      GOOG\n",
       "AAPL  1.000000  0.399340  0.573224  0.521823\n",
       "IBM   0.399340  1.000000  0.484904  0.410386\n",
       "MSFT  0.573224  0.484904  1.000000  0.658704\n",
       "GOOG  0.521823  0.410386  0.658704  1.000000"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "returns.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:30.569188Z",
     "start_time": "2019-12-24T11:54:30.400959Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>IBM</th>\n",
       "      <th>MSFT</th>\n",
       "      <th>GOOG</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>0.000245</td>\n",
       "      <td>0.000081</td>\n",
       "      <td>0.000132</td>\n",
       "      <td>0.000124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IBM</th>\n",
       "      <td>0.000081</td>\n",
       "      <td>0.000168</td>\n",
       "      <td>0.000092</td>\n",
       "      <td>0.000080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MSFT</th>\n",
       "      <td>0.000132</td>\n",
       "      <td>0.000092</td>\n",
       "      <td>0.000216</td>\n",
       "      <td>0.000146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GOOG</th>\n",
       "      <td>0.000124</td>\n",
       "      <td>0.000080</td>\n",
       "      <td>0.000146</td>\n",
       "      <td>0.000229</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          AAPL       IBM      MSFT      GOOG\n",
       "AAPL  0.000245  0.000081  0.000132  0.000124\n",
       "IBM   0.000081  0.000168  0.000092  0.000080\n",
       "MSFT  0.000132  0.000092  0.000216  0.000146\n",
       "GOOG  0.000124  0.000080  0.000146  0.000229"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "returns.cov()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:30.727892Z",
     "start_time": "2019-12-24T11:54:30.572979Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AAPL    0.399340\n",
       "IBM     1.000000\n",
       "MSFT    0.484904\n",
       "GOOG    0.410386\n",
       "dtype: float64"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# corrwith method - compute pairwise correlations\n",
    "# between a DataFrame’s columns or rows with another Series oDataFrame’sme\n",
    "returns.corrwith(returns.IBM)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Unique Values, Value Counts, and Membership\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:30.844787Z",
     "start_time": "2019-12-24T11:54:30.727892Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['c', 'a', 'd', 'b'], dtype=object)"
      ]
     },
     "execution_count": 127,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# unique - gives you an array of the unique values in a Series\n",
    "\n",
    "obj = pd.Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])\n",
    "obj.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:30.977262Z",
     "start_time": "2019-12-24T11:54:30.846938Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "c    3\n",
       "a    3\n",
       "b    2\n",
       "d    1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# value_counts - computes a Series containing value frequencies\n",
    "\n",
    "pd.value_counts(obj.values, sort=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:31.112737Z",
     "start_time": "2019-12-24T11:54:30.978817Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    c\n",
       "1    a\n",
       "2    d\n",
       "3    a\n",
       "4    a\n",
       "5    b\n",
       "6    b\n",
       "7    c\n",
       "8    c\n",
       "dtype: object"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# isin - performs a vectorized set membership check and can be useful in filtering a\n",
    "# dataset down to a subset of values in a Series or column in a DataFrame\n",
    "\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:31.299318Z",
     "start_time": "2019-12-24T11:54:31.114761Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     True\n",
       "1    False\n",
       "2    False\n",
       "3    False\n",
       "4    False\n",
       "5     True\n",
       "6     True\n",
       "7     True\n",
       "8     True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 130,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mask = obj.isin(['b', 'c'])\n",
    "mask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:31.517280Z",
     "start_time": "2019-12-24T11:54:31.300318Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    c\n",
       "5    b\n",
       "6    b\n",
       "7    c\n",
       "8    c\n",
       "dtype: object"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj[mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:31.801449Z",
     "start_time": "2019-12-24T11:54:31.520294Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Qu1</th>\n",
       "      <th>Qu2</th>\n",
       "      <th>Qu3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Qu1  Qu2  Qu3\n",
       "0    1    2    1\n",
       "1    3    3    5\n",
       "2    4    1    2\n",
       "3    3    2    4\n",
       "4    4    3    4"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.DataFrame({'Qu1': [1, 3, 4, 3, 4],\n",
    "                     'Qu2': [2, 3, 1, 2, 3],\n",
    "                     'Qu3': [1, 5, 2, 4, 4]})\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-12-24T11:54:32.315990Z",
     "start_time": "2019-12-24T11:54:31.804884Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4    2\n",
      "3    2\n",
      "1    1\n",
      "Name: Qu1, dtype: int64\n",
      "\n",
      "3    2\n",
      "2    2\n",
      "1    1\n",
      "Name: Qu2, dtype: int64\n",
      "\n",
      "4    2\n",
      "5    1\n",
      "2    1\n",
      "1    1\n",
      "Name: Qu3, dtype: int64\n",
      "\n",
      "   Qu1  Qu2  Qu3\n",
      "1  1.0  1.0  1.0\n",
      "2  0.0  2.0  1.0\n",
      "3  2.0  2.0  0.0\n",
      "4  2.0  0.0  2.0\n",
      "5  0.0  0.0  1.0\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(pd.value_counts(data.Qu1))\n",
    "print(pd.value_counts(data.Qu2))\n",
    "print(pd.value_counts(data.Qu3))\n",
    "print(data.apply(pd.value_counts).fillna(0))\n",
    "# index - distinct values in the df\n",
    "# column values - how many times the distinct value occur in that column"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": true,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "202.837px"
   },
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}