{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# pandas Dataframe - Basic Operativity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div id='toc-container'><script type='text/javascript'>\n",
       "$(function() {\n",
       "    function regenTOC(){\n",
       "        element = $(\"#toc-container\");\n",
       "\n",
       "\tvar toc = document.createElement(\"div\");\n",
       "\t$(toc).attr(\"class\", \"table-of-contents\");\n",
       "\n",
       "\tvar curLevel = 0;\n",
       "\tvar containerStack = [toc];\n",
       "\tvar levelOfTag = {\"h2\": 1, \"h3\": 2, \"h4\": 3, \"h5\": 4};\n",
       "\n",
       "\tfunction pushLevel() {\n",
       "            var list = document.createElement(\"ul\");\n",
       "            containerStack.push(list);\n",
       "            curLevel++;\n",
       "\t}\n",
       "\t\n",
       "\tfunction popLevel() {\n",
       "            var lastContainer = containerStack.pop();\n",
       "            $(lastContainer).appendTo(containerStack[containerStack.length - 1]);\n",
       "            curLevel--;\n",
       "\t}\n",
       "\t\n",
       "\t$(\".text_cell_render :header\").each(function (i, elem) {\n",
       "            var level = levelOfTag[ elem.tagName.toLowerCase() ];\n",
       "\n",
       "            if (level === undefined)\n",
       "\t\treturn;\n",
       "\n",
       "            while (curLevel < level)\n",
       "\t\tpushLevel();\n",
       "            while (curLevel > level)\n",
       "\t\tpopLevel();\n",
       "            \n",
       "            var listItem = document.createElement(\"li\");\n",
       "            var link = document.createElement(\"a\");\n",
       "            $(link)\n",
       "\t\t.text($(elem).contents().first().text()) // Remove the pilcrow sign\n",
       "\t\t.attr(\"href\", \"#\" + $(elem).attr(\"id\"))\n",
       "\t\t.appendTo(listItem);\n",
       "            $(listItem).appendTo(containerStack[containerStack.length - 1]);\n",
       "\t});\n",
       "\t\n",
       "\twhile (curLevel > 0)\n",
       "            popLevel();\n",
       "\n",
       "        $(\"<a class='btn-update' href='#'>Update</a>\")\n",
       "          .click(regenTOC).prependTo(toc);\n",
       "\n",
       "\t$(toc).prepend(\"<div class='title'>Contents</div>\")\n",
       "          .wrap(\"<div class='toc-headings'/>\");\n",
       "\n",
       "        $(element).empty();\n",
       "        $(element).append(toc);\n",
       "    }\n",
       "\n",
       "    if (typeof(IPython) !== 'undefined')\n",
       "        $([IPython.events]).on('notebook_loaded.Notebook', regenTOC);\n",
       "    regenTOC();\n",
       "});\n",
       "\n",
       "</script></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import addutils.toc ; addutils.toc.js(ipy_notebook=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".text_cell_render @font-face {\n",
       "    font-family: \"Computer Modern\";\n",
       "    src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');\n",
       "}\n",
       "\n",
       "div.cell {\n",
       "    width: 900px;\n",
       "    margin-left: 0% !important;\n",
       "    margin-right: 0%;\n",
       "}\n",
       "\n",
       "code {\n",
       "    font-size:10pt;\n",
       "}\n",
       "\n",
       ".text_cell_render  h1 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:28pt;\n",
       "}\n",
       ".text_cell_render h2 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:24pt;\n",
       "}\n",
       ".text_cell_render h3 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:20pt;\n",
       "}\n",
       ".text_cell_render h4 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:18pt;\n",
       "    margin-top:12px;\n",
       "    margin-bottom: 3px;\n",
       "}\n",
       "\n",
       ".text_cell_render h5 {\n",
       "    font-weight: 300;\n",
       "    font-size: 11pt;\n",
       "    color: rgb( 48, 48, 48 );\n",
       "    font-style: italic;\n",
       "    margin-bottom: .5em;\n",
       "    margin-top: 0.5em;\n",
       "    display: block;\n",
       "}\n",
       "\n",
       ".text_cell_render ul {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 90, 90, 90 );\n",
       "    font-size:11pt;\n",
       "    line-height: 185%;\n",
       "}\n",
       "\n",
       ".text_cell_render yp {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 90, 90, 90 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render strong {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 30, 30, 30 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render a:link {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render a:visited {\n",
       "    color:rgb( 10, 88, 126 );\n",
       "}\n",
       "\n",
       ".text_cell_render {\n",
       "    font-family: Helvetica, Courier, Computer Modern, \"Helvetica Neue\", Arial, Geneva, sans-serif;\n",
       "    color: rgb( 84, 84, 84 );\n",
       "    font-size:11pt;\n",
       "    line-height: 125%;\n",
       "    font-size: 100%;\n",
       "    width:800px;\n",
       "}\n",
       "\n",
       ".CodeMirror {\n",
       "    font-family: Courier, \"Source Code Pro\", source-code-pro,Consolas, monospace;\n",
       "}\n",
       "\n",
       ".warning {\n",
       "    color: rgb( 240, 20, 20 );\n",
       "}\n",
       "\n",
       "/* Pandas tables */\n",
       "/*\n",
       ".rendered_html td {\n",
       "    text-align: right;\n",
       "}\n",
       "*/\n",
       "\n",
       "table.dataframe td {\n",
       "    text-align: right;\n",
       "}\n",
       "\n",
       ".output .table-of-contents {\n",
       "    border: 1px #cecece solid;\n",
       "    background-color: #fafafa;\n",
       "    padding-top: 10px;\n",
       "    padding-bottom: 5px;\n",
       "    padding-right: 15px;\n",
       "    padding-left: 0px;\n",
       "    margin-bottom: 20px;\n",
       "    display: inline-block;\n",
       "    position: relative;\n",
       "}\n",
       "\n",
       ".output .table-of-contents ul {\n",
       "    list-style-type: none;\n",
       "    padding-left: 20px;\n",
       "}\n",
       "\n",
       ".output .table-of-contents .title {\n",
       "    font-weight: bold;\n",
       "    font-height: 11pt;\n",
       "    padding-left: 20px; /* looks better if it's the same to the <ul> */\n",
       "}\n",
       "\n",
       ".output .table-of-contents .btn-update {\n",
       "    position: absolute;\n",
       "    float: right;\n",
       "    right: 11px;\n",
       "    top: 4px;\n",
       "    font-size: 9pt;\n",
       "}\n",
       "\n",
       "</style>\n",
       "<script>\n",
       "    MathJax.Hub.Config({\n",
       "                        TeX: {\n",
       "                           extensions: [\"AMSmath.js\"]\n",
       "                           },\n",
       "                displayAlign: 'center', // Change this to 'center' to center equations.\n",
       "                \"HTML-CSS\": {\n",
       "                    styles: {'.MathJax_Display': {\"margin\": 4}}\n",
       "                }\n",
       "        });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from addutils import css_notebook\n",
    "css_notebook()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "See [pandas documentation](<http://pandas.pydata.org/pandas-docs/stable/>) for more information and examples. Run the code at the end of the Notebook to generate the example files."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".text_cell_render @font-face {\n",
       "    font-family: \"Computer Modern\";\n",
       "    src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');\n",
       "}\n",
       "\n",
       "div.cell {\n",
       "    width: 900px;\n",
       "    margin-left: 0% !important;\n",
       "    margin-right: 0%;\n",
       "}\n",
       "\n",
       "code {\n",
       "    font-size:10pt;\n",
       "}\n",
       "\n",
       ".text_cell_render  h1 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:28pt;\n",
       "}\n",
       ".text_cell_render h2 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:24pt;\n",
       "}\n",
       ".text_cell_render h3 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:20pt;\n",
       "}\n",
       ".text_cell_render h4 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:18pt;\n",
       "    margin-top:12px;\n",
       "    margin-bottom: 3px;\n",
       "}\n",
       "\n",
       ".text_cell_render h5 {\n",
       "    font-weight: 300;\n",
       "    font-size: 11pt;\n",
       "    color: rgb( 48, 48, 48 );\n",
       "    font-style: italic;\n",
       "    margin-bottom: .5em;\n",
       "    margin-top: 0.5em;\n",
       "    display: block;\n",
       "}\n",
       "\n",
       ".text_cell_render ul {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 90, 90, 90 );\n",
       "    font-size:11pt;\n",
       "    line-height: 185%;\n",
       "}\n",
       "\n",
       ".text_cell_render yp {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 90, 90, 90 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render strong {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 30, 30, 30 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render a:link {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render a:visited {\n",
       "    color:rgb( 10, 88, 126 );\n",
       "}\n",
       "\n",
       ".text_cell_render {\n",
       "    font-family: Helvetica, Courier, Computer Modern, \"Helvetica Neue\", Arial, Geneva, sans-serif;\n",
       "    color: rgb( 84, 84, 84 );\n",
       "    font-size:11pt;\n",
       "    line-height: 125%;\n",
       "    font-size: 100%;\n",
       "    width:800px;\n",
       "}\n",
       "\n",
       ".CodeMirror {\n",
       "    font-family: Courier, \"Source Code Pro\", source-code-pro,Consolas, monospace;\n",
       "}\n",
       "\n",
       ".warning {\n",
       "    color: rgb( 240, 20, 20 );\n",
       "}\n",
       "\n",
       "/* Pandas tables */\n",
       "/*\n",
       ".rendered_html td {\n",
       "    text-align: right;\n",
       "}\n",
       "*/\n",
       "\n",
       "table.dataframe td {\n",
       "    text-align: right;\n",
       "}\n",
       "\n",
       ".output .table-of-contents {\n",
       "    border: 1px #cecece solid;\n",
       "    background-color: #fafafa;\n",
       "    padding-top: 10px;\n",
       "    padding-bottom: 5px;\n",
       "    padding-right: 15px;\n",
       "    padding-left: 0px;\n",
       "    margin-bottom: 20px;\n",
       "    display: inline-block;\n",
       "    position: relative;\n",
       "}\n",
       "\n",
       ".output .table-of-contents ul {\n",
       "    list-style-type: none;\n",
       "    padding-left: 20px;\n",
       "}\n",
       "\n",
       ".output .table-of-contents .title {\n",
       "    font-weight: bold;\n",
       "    font-height: 11pt;\n",
       "    padding-left: 20px; /* looks better if it's the same to the <ul> */\n",
       "}\n",
       "\n",
       ".output .table-of-contents .btn-update {\n",
       "    position: absolute;\n",
       "    float: right;\n",
       "    right: 11px;\n",
       "    top: 4px;\n",
       "    font-size: 9pt;\n",
       "}\n",
       "\n",
       "</style>\n",
       "<script>\n",
       "    MathJax.Hub.Config({\n",
       "                        TeX: {\n",
       "                           extensions: [\"AMSmath.js\"]\n",
       "                           },\n",
       "                displayAlign: 'center', // Change this to 'center' to center equations.\n",
       "                \"HTML-CSS\": {\n",
       "                    styles: {'.MathJax_Display': {\"margin\": 4}}\n",
       "                }\n",
       "        });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from numpy import NaN\n",
    "from addutils import side_by_side2\n",
    "from addutils import css_notebook\n",
    "from addutils import read_txt\n",
    "from IPython.core.display import HTML\n",
    "from faker import Factory\n",
    "css_notebook()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1 File I/O and DataFrame Generation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pandas reads and format data from many different file formats: `txt, csv, web, xls, mat`. In this case we use `read_csv` to read two textual data files.\n",
    "\n",
    "First have a look to the file in its original form:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Date,AAPL,GOOG,JNJ,XOM\n",
      "\n",
      "2015-09-21,115.209999,635.440002,93.129997,73.389999\n",
      "\n",
      "2015-09-22,113.400002,622.690002,93.239998,72.739998\n",
      "\n",
      "2015-09-23,114.32,622.359985,92.989998,72.300003\n",
      "\n",
      "2015-09-24,115.0,625.799988,92.480003,72.730003\n",
      "\n"
     ]
    }
   ],
   "source": [
    "read_txt('temp/p01_prices.txt')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1 Create DataFrames with read_csv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This file can be read and formatted at the same time using `read_csv`. Lets read the two files `p01_prices.txt` and `p01_volumes.txt`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>115.209999</td>\n",
       "      <td>635.440002</td>\n",
       "      <td>93.129997</td>\n",
       "      <td>73.389999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>113.400002</td>\n",
       "      <td>622.690002</td>\n",
       "      <td>93.239998</td>\n",
       "      <td>72.739998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>114.320000</td>\n",
       "      <td>622.359985</td>\n",
       "      <td>92.989998</td>\n",
       "      <td>72.300003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>115.000000</td>\n",
       "      <td>625.799988</td>\n",
       "      <td>92.480003</td>\n",
       "      <td>72.730003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>114.709999</td>\n",
       "      <td>611.969971</td>\n",
       "      <td>91.000000</td>\n",
       "      <td>73.230003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>112.440002</td>\n",
       "      <td>594.890015</td>\n",
       "      <td>91.370003</td>\n",
       "      <td>72.599998</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>46554300</td>\n",
       "      <td>6971400</td>\n",
       "      <td>10585500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>49809000</td>\n",
       "      <td>10607800</td>\n",
       "      <td>14104200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>35645700</td>\n",
       "      <td>5597300</td>\n",
       "      <td>13777500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>49810600</td>\n",
       "      <td>7178400</td>\n",
       "      <td>14283800</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prices = pd.read_csv('temp/p01_prices.txt', index_col=0, parse_dates=[0])\n",
    "volumes = pd.read_csv('temp/p01_volumes.txt', index_col=0, parse_dates=[0])\n",
    "\n",
    "HTML(side_by_side2(prices, volumes))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Both \"prices\" and \"volumes\" datasets are 2D DataFrame objects:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "pandas.core.frame.DataFrame"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(prices)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2 Create DataFrames from Python Dictionaries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "fakeIT = Factory.create('it_IT')\n",
    "data = {'Name' : [fakeIT.name() for i in range(5)],\n",
    "        'Company' : [fakeIT.company() for i in range(5)],\n",
    "        'City' : [fakeIT.city() for i in range(5)]}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Company</th>\n",
       "      <th>City</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ruth Milani</td>\n",
       "      <td>Basile-Gentile SPA</td>\n",
       "      <td>Quarto Elda</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Albino Marchetti</td>\n",
       "      <td>De luca SPA</td>\n",
       "      <td>Donati calabro</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Vienna Cattaneo</td>\n",
       "      <td>Ferretti, Coppola e Colombo SPA</td>\n",
       "      <td>Gioacchino sardo</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Elga Vitale</td>\n",
       "      <td>Bianchi, Battaglia e Fontana e figli</td>\n",
       "      <td>San Cecco umbro</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Diana Greco</td>\n",
       "      <td>Lombardi s.r.l.</td>\n",
       "      <td>Borgo Sarita</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Name                               Company              City\n",
       "0       Ruth Milani                    Basile-Gentile SPA       Quarto Elda\n",
       "1  Albino Marchetti                           De luca SPA    Donati calabro\n",
       "2   Vienna Cattaneo       Ferretti, Coppola e Colombo SPA  Gioacchino sardo\n",
       "3       Elga Vitale  Bianchi, Battaglia e Fontana e figli   San Cecco umbro\n",
       "4       Diana Greco                       Lombardi s.r.l.      Borgo Sarita"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(data, columns = ['Name','Company','City'])\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3 Create DataFrames from Items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Company</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Dott. Fatima Carbone</td>\n",
       "      <td>Caputo s.r.l.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Sig. Alighiero Sorrentino</td>\n",
       "      <td>De rosa Group</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sig. Pablo Monti</td>\n",
       "      <td>Cattaneo SPA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Demian Palmieri</td>\n",
       "      <td>Mazza-Martinelli SPA</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Sig.ra Loretta Martino</td>\n",
       "      <td>Conte-Sartori Group</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        Name               Company\n",
       "0       Dott. Fatima Carbone         Caputo s.r.l.\n",
       "1  Sig. Alighiero Sorrentino         De rosa Group\n",
       "2           Sig. Pablo Monti          Cattaneo SPA\n",
       "3            Demian Palmieri  Mazza-Martinelli SPA\n",
       "4     Sig.ra Loretta Martino   Conte-Sartori Group"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame.from_items([('Name', [fakeIT.name() for i in range(5)]),\n",
    "                              ('Company', [fakeIT.company() for i in range(5)])])\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.4 Create DataFrames fron Numpy Arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ONE</th>\n",
       "      <th>TWO</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ONE  TWO\n",
       "a    2    3\n",
       "b    5    6"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(np.array([[2,5],[3,6]]).T, index=list('ab'), columns=['ONE','TWO'])\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.5 DataFrames can be converted in Numpy Arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2, 3],\n",
       "       [5, 6]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.asarray(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.6 DataFrames, Series and Panels"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In Pandas there are 3 main data structure types (\"data container\" objects):  \n",
    "\n",
    "* **Series:**  for one-dimensional data \n",
    "* **DataFrames:**  for bi-dimensional data (matrices) \n",
    "* **Panels:** for 3D or nD data  \n",
    "\n",
    "For simplicity, in this course we will describe only pandas Series and DataFrames."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2 Automatic Data Alignment"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "As you can see the dates has been interpreted correctly and used as row index. Notice that the rows in the two datafiles are misaligned, this is not a problem in pandas because the *Automatic Data Alignment* feature: an operation involving the two datasets will simply use `NaN` for the undefined (misaligned) values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>5.363521e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.492465e+08</td>\n",
       "      <td>7.768698e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>5.648341e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.890713e+08</td>\n",
       "      <td>1.025939e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>4.075016e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.204929e+08</td>\n",
       "      <td>9.961133e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>5.728219e+09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.638585e+08</td>\n",
       "      <td>1.038861e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    AAPL  GOOG           JNJ           XOM\n",
       "Date                                                      \n",
       "2015-09-21  5.363521e+09   NaN  6.492465e+08  7.768698e+08\n",
       "2015-09-22  5.648341e+09   NaN  9.890713e+08  1.025939e+09\n",
       "2015-09-23  4.075016e+09   NaN  5.204929e+08  9.961133e+08\n",
       "2015-09-24  5.728219e+09   NaN  6.638585e+08  1.038861e+09\n",
       "2015-09-25           NaN   NaN           NaN           NaN\n",
       "2015-09-28           NaN   NaN           NaN           NaN"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prices*volumes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Which can be better formatted to a \"2 decimal places float number\" with comma as thousands separator (see <a href=\"#options\">Package Options</a>):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>5,363,520,856.4</td>\n",
       "      <td>-</td>\n",
       "      <td>649,246,461.1</td>\n",
       "      <td>776,869,834.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>5,648,340,699.6</td>\n",
       "      <td>-</td>\n",
       "      <td>989,071,250.8</td>\n",
       "      <td>1,025,939,479.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>4,075,016,424.0</td>\n",
       "      <td>-</td>\n",
       "      <td>520,492,915.8</td>\n",
       "      <td>996,113,291.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>5,728,219,000.0</td>\n",
       "      <td>-</td>\n",
       "      <td>663,858,453.5</td>\n",
       "      <td>1,038,860,816.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "      <td>-</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      AAPL GOOG           JNJ             XOM\n",
       "Date                                                         \n",
       "2015-09-21 5,363,520,856.4    - 649,246,461.1   776,869,834.4\n",
       "2015-09-22 5,648,340,699.6    - 989,071,250.8 1,025,939,479.8\n",
       "2015-09-23 4,075,016,424.0    - 520,492,915.8   996,113,291.3\n",
       "2015-09-24 5,728,219,000.0    - 663,858,453.5 1,038,860,816.9\n",
       "2015-09-25               -    -             -               -\n",
       "2015-09-28               -    -             -               -"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.float_format', lambda x: '{:,.1f}'.format(x))   # formatting\n",
    "(prices*volumes).replace('nan', '-')    # replacing NaN"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example: calculate the volume-weighted average price"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AAPL   114.5\n",
       "JNJ     93.0\n",
       "XOM     72.8\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "vwap = (prices*volumes).sum()/volumes.sum()\n",
    "vwap.dropna()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3 Indexing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.1 Label-Based Indexing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`.loc` is strictly label based, will raise KeyError when the items are not found, allowed inputs are:\n",
    "\n",
    "* A single label\n",
    "* A list or array of labels [’a’, ’b’, ’c’]\n",
    "* A slice object with labels [’a’:’f’] (note that contrary to usual python slices, both the start and the stop are included!)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>115.2</td>\n",
       "      <td>635.4</td>\n",
       "      <td>93.1</td>\n",
       "      <td>73.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>113.4</td>\n",
       "      <td>622.7</td>\n",
       "      <td>93.2</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>114.3</td>\n",
       "      <td>622.4</td>\n",
       "      <td>93.0</td>\n",
       "      <td>72.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>115.0</td>\n",
       "      <td>625.8</td>\n",
       "      <td>92.5</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>114.7</td>\n",
       "      <td>612.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>73.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>112.4</td>\n",
       "      <td>594.9</td>\n",
       "      <td>91.4</td>\n",
       "      <td>72.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "HTML(side_by_side2(prices, prices.loc['2012-11-21':'2012-11-27',['AAPL', 'GOOG']]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Columns can be selected without specifying the index:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>115.2</td>\n",
       "      <td>635.4</td>\n",
       "      <td>93.1</td>\n",
       "      <td>73.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>113.4</td>\n",
       "      <td>622.7</td>\n",
       "      <td>93.2</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>114.3</td>\n",
       "      <td>622.4</td>\n",
       "      <td>93.0</td>\n",
       "      <td>72.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>115.0</td>\n",
       "      <td>625.8</td>\n",
       "      <td>92.5</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>114.7</td>\n",
       "      <td>612.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>73.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>112.4</td>\n",
       "      <td>594.9</td>\n",
       "      <td>91.4</td>\n",
       "      <td>72.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>115.2</td>\n",
       "      <td>635.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>113.4</td>\n",
       "      <td>622.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>114.3</td>\n",
       "      <td>622.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>115.0</td>\n",
       "      <td>625.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>114.7</td>\n",
       "      <td>612.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>112.4</td>\n",
       "      <td>594.9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "HTML(side_by_side2(prices, prices[['AAPL', 'GOOG']]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.2 Position-Based Indexing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`.iloc` is strictly position based, will raise KeyError when the items are out of bounds:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>115.2</td>\n",
       "      <td>635.4</td>\n",
       "      <td>93.1</td>\n",
       "      <td>73.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>113.4</td>\n",
       "      <td>622.7</td>\n",
       "      <td>93.2</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>114.3</td>\n",
       "      <td>622.4</td>\n",
       "      <td>93.0</td>\n",
       "      <td>72.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>115.0</td>\n",
       "      <td>625.8</td>\n",
       "      <td>92.5</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>114.7</td>\n",
       "      <td>612.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>73.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>112.4</td>\n",
       "      <td>594.9</td>\n",
       "      <td>91.4</td>\n",
       "      <td>72.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>113.4</td>\n",
       "      <td>622.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>114.3</td>\n",
       "      <td>622.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>115.0</td>\n",
       "      <td>625.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>114.7</td>\n",
       "      <td>612.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "HTML(side_by_side2(prices, prices.iloc[1:5,[0, 1]]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3.3 Mixed Indexing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.3 Advanced Indexing - .ix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`.ix` will always **try first** to resolve **labeled** index (like `.loc`), **then** it will fall back on **potitional** indexing (like `.loc`)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Rows can be indexed using the ix method. Try by yourself:  \n",
    "\n",
    "    prices.ix[1:4,0:2]                                # Position-based Indexing  \n",
    "    prices.ix[:'2012-11-23']                          # Label-based Indexing on index (rows)\n",
    "    prices.ix[:,[2,2,1]]                              # Duplicated values on columns \n",
    "    prices.ix[::2]                                    # One value every two rows\n",
    "    prices.ix[::-1]                                   # Reverse rows\n",
    "    prices.ix[prices['AAPL'] > 380]                   # Boolean indexing on index\n",
    "    prices.ix[:,[len(c)<4 for c in prices.columns]]   # Boolean indexing on columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>93.1</td>\n",
       "      <td>73.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>93.2</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>93.0</td>\n",
       "      <td>72.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>92.5</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>91.0</td>\n",
       "      <td>73.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>91.4</td>\n",
       "      <td>72.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            JNJ  XOM\n",
       "Date                \n",
       "2015-09-21 93.1 73.4\n",
       "2015-09-22 93.2 72.7\n",
       "2015-09-23 93.0 72.3\n",
       "2015-09-24 92.5 72.7\n",
       "2015-09-25 91.0 73.2\n",
       "2015-09-28 91.4 72.6"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prices.ix[:,[len(c)<4 for c in prices.columns]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4 DataFrame Basic Operations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.1 Reindex/Reorder rows and columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>W</th>\n",
       "      <th>K</th>\n",
       "      <th>T</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>6.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>12.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>z</th>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = np.array([[2,5,8,11],[3,6,9,12]])\n",
    "d1 = pd.DataFrame(data.T, index=list('abce'), columns=['K','W'])\n",
    "HTML(side_by_side2(d1, pd.DataFrame(d1, index=list('baez'), columns=['W','K','T'])) )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.2 Calculate new columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "      <th>Z</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    K   W  Z      B\n",
       "a   2   3  1  False\n",
       "b   5   6  1   True\n",
       "c   8   9  1   True\n",
       "e  11  12  1   True"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d1['Z'] = d1['W']-d1['K']\n",
    "d1['B'] = d1['W']>4\n",
    "d1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.3 Deleting rows and columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "      <th>Z</th>\n",
       "      <th>B</th>\n",
       "      <th>SUM</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "      <th>Z</th>\n",
       "      <th>B</th>\n",
       "      <th>SUM</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "      <th>SUM</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d1['SUM'] = d1.sum(axis=1)\n",
    "HTML(side_by_side2(d1, d1.drop(['b', 'c'], axis=0), d1.drop(['Z', 'B'], axis=1)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.4 Inserting colums in a specific position"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "      <th>Z</th>\n",
       "      <th>B</th>\n",
       "      <th>SUM</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>Exp(W)</th>\n",
       "      <th>W</th>\n",
       "      <th>Z</th>\n",
       "      <th>B</th>\n",
       "      <th>SUM</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>20.1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>6.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>403.4</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>8,103.1</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>162,754.8</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d2 = d1.copy()  # .copy() method is needed to create a new object.\n",
    "d2.insert(1, 'Exp(W)', np.exp(d1['W']))\n",
    "HTML(side_by_side2(d1, d2, space=10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example: Indexing rows to create a new column with empty values, then use the Forward Fill Padding to fill the gaps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "      <th>Z</th>\n",
       "      <th>B</th>\n",
       "      <th>SUM</th>\n",
       "      <th>part</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>19.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    K   W  Z      B  SUM  part\n",
       "a   2   3  1  False  6.0   2.0\n",
       "b   5   6  1   True 13.0   5.0\n",
       "c   8   9  1   True 19.0   5.0\n",
       "e  11  12  1   True 25.0   5.0"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d1['part'] = d1['K'].ix[:2]\n",
    "d1['part'] = d1['part'].fillna(method='ffill')\n",
    "d1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.5 Check if a value or a list of given values are contained in a specific column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "      <th>Z</th>\n",
       "      <th>B</th>\n",
       "      <th>SUM</th>\n",
       "      <th>part</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>19.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# TODO: Upgrade side_by_side2 to include series\n",
    "HTML(side_by_side2(d1, d1['K'].isin([3, 8])))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.6 Rename columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>K</th>\n",
       "      <th>W</th>\n",
       "      <th>Z</th>\n",
       "      <th>B</th>\n",
       "      <th>SUM</th>\n",
       "      <th>part</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>19.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div><div style=\"float:left;padding-right: 0.6em;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ONE</th>\n",
       "      <th>TWO</th>\n",
       "      <th>THREE</th>\n",
       "      <th>B</th>\n",
       "      <th>SUM</th>\n",
       "      <th>part</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>False</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>b</th>\n",
       "      <td>5</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>c</th>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>19.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>e</th>\n",
       "      <td>11</td>\n",
       "      <td>12</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "HTML(side_by_side2(d1, d1.rename(columns={'K':'ONE','W':'TWO','Z':'THREE'})))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.7 Iterate efficiently through rows"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`iterrows` returns an iterator yielding each index value along with a Series containing the data in each row:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "a ** 2 - 3 - 1 - False - 6.0 - 2.0\n",
      "b ** 5 - 6 - 1 - True - 13.0 - 5.0\n",
      "c ** 8 - 9 - 1 - True - 19.0 - 5.0\n",
      "e ** 11 - 12 - 1 - True - 25.0 - 5.0\n"
     ]
    }
   ],
   "source": [
    "for row_index, row in d1.iterrows():\n",
    "    print(row_index, '**', ' - '.join([str(item) for item in row]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`itertuples` returns an iterator yielding a tuple for each row in the DataFrame. The first element of the tuple is the row’s corresponding index value, while the remaining elements are the row values:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('a', 2, 3, 1, False, 6.0, 2.0)\n",
      "('b', 5, 6, 1, True, 13.0, 5.0)\n",
      "('c', 8, 9, 1, True, 19.0, 5.0)\n",
      "('e', 11, 12, 1, True, 25.0, 5.0)\n"
     ]
    }
   ],
   "source": [
    "for t in d1.itertuples():\n",
    "    print(t)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5 Duplicated Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.1 Find duplicated data in columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>a dup</th>\n",
       "      <th>a+b dup</th>\n",
       "      <th>a+b dup - take last</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>x</td>\n",
       "      <td>-0.3</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>one</td>\n",
       "      <td>y</td>\n",
       "      <td>-0.1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>two</td>\n",
       "      <td>y</td>\n",
       "      <td>1.2</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>three</td>\n",
       "      <td>x</td>\n",
       "      <td>-0.5</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>two</td>\n",
       "      <td>y</td>\n",
       "      <td>1.3</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       a  b    c  a dup a+b dup a+b dup - take last\n",
       "0    one  x -0.3  False   False               False\n",
       "1    one  y -0.1   True   False               False\n",
       "2    two  y  1.2  False   False                True\n",
       "3  three  x -0.5  False   False               False\n",
       "4    two  y  1.3   True    True               False"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d3 = pd.read_csv('temp/p01_d2.csv', index_col=0)\n",
    "d3['a dup'] = d3.duplicated(['a'])\n",
    "d3['a+b dup'] = d3.duplicated(['a', 'b'])\n",
    "d3['a+b dup - take last'] = d3.duplicated(['a', 'b'], keep='last')\n",
    "d3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.2 Remove Duplicates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>a</th>\n",
       "      <th>b</th>\n",
       "      <th>c</th>\n",
       "      <th>a dup</th>\n",
       "      <th>a+b dup</th>\n",
       "      <th>a+b dup - take last</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>one</td>\n",
       "      <td>x</td>\n",
       "      <td>-0.3</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>one</td>\n",
       "      <td>y</td>\n",
       "      <td>-0.1</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>three</td>\n",
       "      <td>x</td>\n",
       "      <td>-0.5</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>two</td>\n",
       "      <td>y</td>\n",
       "      <td>1.3</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       a  b    c  a dup a+b dup a+b dup - take last\n",
       "0    one  x -0.3  False   False               False\n",
       "1    one  y -0.1   True   False               False\n",
       "3  three  x -0.5  False   False               False\n",
       "4    two  y  1.3   True    True               False"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d3.drop_duplicates(['a', 'b'],keep='last')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6 Working with Large Arrays"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.1 Control the DataFrame memory occupation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's start by generating a DataFrame from a Numpy Array. We'll see than there is no memory overhead on DataFrame Values:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rows x Cols x 8:                      80000\n",
      "np Array Memory Occupation:           80000\n",
      "Dataframe Values Memory Occupation:   80000\n",
      "Dataframe Index Memory Occupation:    800\n",
      "Dataframe Columns Memory Occupation:  800\n"
     ]
    }
   ],
   "source": [
    "rows, cols = 100, 100\n",
    "np_array = np.array(np.random.randn(rows, cols), dtype=np.float64)\n",
    "d4 = pd.DataFrame(np_array)\n",
    "print ('Rows x Cols x 8:                     ', rows*cols*8)\n",
    "print ('np Array Memory Occupation:          ', np_array.nbytes)\n",
    "print ('Dataframe Values Memory Occupation:  ', d4.values.nbytes)\n",
    "print ('Dataframe Index Memory Occupation:   ', d4.index.nbytes)\n",
    "print ('Dataframe Columns Memory Occupation: ', d4.columns.nbytes)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To reduce the memory occupation it's possible to change the value's dtype:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataframe Values Memory Occupation:   20000\n"
     ]
    }
   ],
   "source": [
    "d4 = d4.astype(dtype=np.float16)\n",
    "print ('Dataframe Values Memory Occupation:  ', d4.values.nbytes)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If the data is sparse the Dataframe can be sparsified as well to save further resources with the `to_sparse()` method:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataframe Values Memory Occupation:   20000\n",
      "Dataframe Values Memory Occupation:   80000\n"
     ]
    }
   ],
   "source": [
    "d4.ix[2:,4:] = np.nan\n",
    "print ('Dataframe Values Memory Occupation:  ', d4.values.nbytes)\n",
    "d4 = d4.to_sparse()\n",
    "print ('Dataframe Values Memory Occupation:  ', d4.values.nbytes)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this case rows and colums are np.int64 arrays:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,\n",
       "            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
       "            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,\n",
       "            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,\n",
       "            68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,\n",
       "            85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],\n",
       "           dtype='int64')"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d4.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.2 Explore large arrays"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Working with large arrays: in Excel is difficult to explore arrays with thousands of lines and columns. Explore the pandas capabilities with the following code. The first line visualize the firts two lines, while the second actually load the whole file. Try to do the same in Excel for comparison."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "d3 = pd.read_csv('example_data/p01_d3.csv.gz', compression='gzip')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Istat - Comune - Provincia - Regione - Prefisso - CAP - CodFisco - Abitanti - Link - "
     ]
    }
   ],
   "source": [
    "for col in d3.columns:         \n",
    "    print (col, end=' - ')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Try by yourself:  \n",
    "\n",
    "    d3.head()\n",
    "    d3[d3.columns[:3]].head()\n",
    "    d3[d3.columns[-4:-1]].tail()\n",
    "    d3.ix[1000:1010, :7]\n",
    "    d3.ix[:, 'Abitanti'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CAP</th>\n",
       "      <th>CodFisco</th>\n",
       "      <th>Abitanti</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8087</th>\n",
       "      <td>33020</td>\n",
       "      <td>M200</td>\n",
       "      <td>607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8088</th>\n",
       "      <td>13848</td>\n",
       "      <td>M201</td>\n",
       "      <td>1152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8089</th>\n",
       "      <td>87040</td>\n",
       "      <td>M202</td>\n",
       "      <td>2413</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8090</th>\n",
       "      <td>83030</td>\n",
       "      <td>M203</td>\n",
       "      <td>1232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8091</th>\n",
       "      <td>89867</td>\n",
       "      <td>M204</td>\n",
       "      <td>2055</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        CAP CodFisco  Abitanti\n",
       "8087  33020     M200       607\n",
       "8088  13848     M201      1152\n",
       "8089  87040     M202      2413\n",
       "8090  83030     M203      1232\n",
       "8091  89867     M204      2055"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d3[d3.columns[-4:-1]].tail()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7 Column pct_change and shift"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It is possibile to add multiple new columns to a `DataFrame`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "      <th>five</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>4.0</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>7.0</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>16.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>16.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>56.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>65.0</td>\n",
       "      <td>82.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>72.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>88.0</td>\n",
       "      <td>91.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two  three  four  five\n",
       "0  3.0  8.0    3.0   1.0   nan\n",
       "1  5.0 16.0    6.0   2.0   nan\n",
       "2  7.0 28.0    nan   4.0   nan\n",
       "3 10.0 37.0    nan   7.0   nan\n",
       "4 13.0 45.0   15.0  11.0  16.0\n",
       "5 16.0 57.0   18.0  16.0  19.0\n",
       "6 56.0 69.0    nan  65.0  82.0\n",
       "7 72.0 90.0    nan  88.0  91.0"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = np.array([[3, 5, 7, 10, 13, 16, 56, 72],\n",
    "                 [8, 16, 28, 37, 45, 57, 69, 90],\n",
    "                 [3, 6, NaN, NaN, 15, 18, NaN, NaN],\n",
    "                 [1, 2, 4, 7, 11, 16, 65, 88],\n",
    "                 [NaN, NaN, NaN, NaN, 16, 19, 82, 91]])\n",
    "d4 = pd.DataFrame(data.T, columns=['one', 'two', 'three', 'four', 'five'])\n",
    "d4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "      <th>five</th>\n",
       "      <th>one ret</th>\n",
       "      <th>two ret</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.7</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>4.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>7.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>16.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>56.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>65.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>72.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>88.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two  three  four  five  one ret  two ret\n",
       "0  3.0  8.0    3.0   1.0   nan      nan      nan\n",
       "1  5.0 16.0    6.0   2.0   nan      1.7      2.0\n",
       "2  7.0 28.0    nan   4.0   nan      1.4      1.8\n",
       "3 10.0 37.0    nan   7.0   nan      1.4      1.3\n",
       "4 13.0 45.0   15.0  11.0  16.0      1.3      1.2\n",
       "5 16.0 57.0   18.0  16.0  19.0      1.2      1.3\n",
       "6 56.0 69.0    nan  65.0  82.0      3.5      1.2\n",
       "7 72.0 90.0    nan  88.0  91.0      1.3      1.3"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d4[['one ret','two ret']] = d4[['one','two']].pct_change()+1\n",
    "d4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "      <th>five</th>\n",
       "      <th>one ret</th>\n",
       "      <th>two ret</th>\n",
       "      <th>four var</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.7</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>4.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>7.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.2</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>16.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>56.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>65.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.2</td>\n",
       "      <td>3.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>72.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>88.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>3.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two  three  four  five  one ret  two ret  four var\n",
       "0  3.0  8.0    3.0   1.0   nan      nan      nan       nan\n",
       "1  5.0 16.0    6.0   2.0   nan      1.7      2.0       0.0\n",
       "2  7.0 28.0    nan   4.0   nan      1.4      1.8       0.7\n",
       "3 10.0 37.0    nan   7.0   nan      1.4      1.3       1.1\n",
       "4 13.0 45.0   15.0  11.0  16.0      1.3      1.2       1.4\n",
       "5 16.0 57.0   18.0  16.0  19.0      1.2      1.3       1.6\n",
       "6 56.0 69.0    nan  65.0  82.0      3.5      1.2       3.9\n",
       "7 72.0 90.0    nan  88.0  91.0      1.3      1.3       3.1"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d4['four var'] = np.log(d4['four'] - d4['four'].shift())\n",
    "d4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8 Reindex"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`DataFrame.reindex` method conforms a `DataFrame` to a new index, filling cells with no values. It is possible to use this method to rearrange columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>one ret</th>\n",
       "      <th>two</th>\n",
       "      <th>two ret</th>\n",
       "      <th>four</th>\n",
       "      <th>four var</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>8.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.0</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.0</td>\n",
       "      <td>1.7</td>\n",
       "      <td>16.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>28.0</td>\n",
       "      <td>1.8</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>37.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>45.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>16.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>57.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>16.0</td>\n",
       "      <td>1.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>56.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>69.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>65.0</td>\n",
       "      <td>3.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>72.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>90.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>88.0</td>\n",
       "      <td>3.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  one ret  two  two ret  four  four var\n",
       "0  3.0      nan  8.0      nan   1.0       nan\n",
       "1  5.0      1.7 16.0      2.0   2.0       0.0\n",
       "2  7.0      1.4 28.0      1.8   4.0       0.7\n",
       "3 10.0      1.4 37.0      1.3   7.0       1.1\n",
       "4 13.0      1.3 45.0      1.2  11.0       1.4\n",
       "5 16.0      1.2 57.0      1.3  16.0       1.6\n",
       "6 56.0      3.5 69.0      1.2  65.0       3.9\n",
       "7 72.0      1.3 90.0      1.3  88.0       3.1"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d5 = d4.reindex(columns=['one','one ret','two','two ret','four','four var'])\n",
    "d5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Notice that `DataFrame.reindex` gives a new view, hence `d4` isn't changed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>one</th>\n",
       "      <th>two</th>\n",
       "      <th>three</th>\n",
       "      <th>four</th>\n",
       "      <th>five</th>\n",
       "      <th>one ret</th>\n",
       "      <th>two ret</th>\n",
       "      <th>four var</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "      <td>nan</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.7</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>4.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.8</td>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>10.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>7.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.2</td>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>16.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>1.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>56.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>65.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.2</td>\n",
       "      <td>3.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>72.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>nan</td>\n",
       "      <td>88.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>3.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   one  two  three  four  five  one ret  two ret  four var\n",
       "0  3.0  8.0    3.0   1.0   nan      nan      nan       nan\n",
       "1  5.0 16.0    6.0   2.0   nan      1.7      2.0       0.0\n",
       "2  7.0 28.0    nan   4.0   nan      1.4      1.8       0.7\n",
       "3 10.0 37.0    nan   7.0   nan      1.4      1.3       1.1\n",
       "4 13.0 45.0   15.0  11.0  16.0      1.3      1.2       1.4\n",
       "5 16.0 57.0   18.0  16.0  19.0      1.2      1.3       1.6\n",
       "6 56.0 69.0    nan  65.0  82.0      3.5      1.2       3.9\n",
       "7 72.0 90.0    nan  88.0  91.0      1.3      1.3       3.1"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9 More on Indexing: Multi Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Country</th>\n",
       "      <th>Number</th>\n",
       "      <th>Dir</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">Fra</th>\n",
       "      <th>one</th>\n",
       "      <th>x</th>\n",
       "      <td>-0.1</td>\n",
       "      <td>0.3</td>\n",
       "      <td>-0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">two</th>\n",
       "      <th>y</th>\n",
       "      <td>0.3</td>\n",
       "      <td>-0.9</td>\n",
       "      <td>0.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>z</th>\n",
       "      <td>1.8</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ger</th>\n",
       "      <th>one</th>\n",
       "      <th>x</th>\n",
       "      <td>-0.7</td>\n",
       "      <td>-0.5</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">Jap</th>\n",
       "      <th>one</th>\n",
       "      <th>x</th>\n",
       "      <td>1.2</td>\n",
       "      <td>1.2</td>\n",
       "      <td>-0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>two</th>\n",
       "      <th>x</th>\n",
       "      <td>-0.4</td>\n",
       "      <td>0.5</td>\n",
       "      <td>-0.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">USA</th>\n",
       "      <th rowspan=\"2\" valign=\"top\">one</th>\n",
       "      <th>y</th>\n",
       "      <td>-1.9</td>\n",
       "      <td>-0.9</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>z</th>\n",
       "      <td>-0.1</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      0    1    2\n",
       "Country Number Dir               \n",
       "Fra     one    x   -0.1  0.3 -0.2\n",
       "        two    y    0.3 -0.9  0.3\n",
       "               z    1.8  1.0 -1.4\n",
       "Ger     one    x   -0.7 -0.5  0.4\n",
       "Jap     one    x    1.2  1.2 -0.2\n",
       "        two    x   -0.4  0.5 -0.6\n",
       "USA     one    y   -1.9 -0.9  0.5\n",
       "               z   -0.1 -1.0  0.7"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d6 = pd.read_csv('temp/p01_d4.csv', index_col=['Country',\n",
    "                                               'Number',\n",
    "                                               'Dir'])\n",
    "d6 = d6.sortlevel()\n",
    "d6"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Try by yourself:\n",
    "\n",
    "    d6.ix['Fra']\n",
    "    d6.ix['Fra', 'two']\n",
    "    d6.ix['Fra':'Ger']\n",
    "    d6.reorder_levels([2,1,0], axis=0).sortlevel(0)\n",
    "    d6.reset_index(level=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Number</th>\n",
       "      <th>Dir</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <th>x</th>\n",
       "      <td>-0.1</td>\n",
       "      <td>0.3</td>\n",
       "      <td>-0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">two</th>\n",
       "      <th>y</th>\n",
       "      <td>0.3</td>\n",
       "      <td>-0.9</td>\n",
       "      <td>0.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>z</th>\n",
       "      <td>1.8</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              0    1    2\n",
       "Number Dir               \n",
       "one    x   -0.1  0.3 -0.2\n",
       "two    y    0.3 -0.9  0.3\n",
       "       z    1.8  1.0 -1.4"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d6.ix['Fra'] "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10 Package Options"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The way the DataFrames are displayed can be customized ijn many ways: ([See documentation](http://pandas.pydata.org/pandas-docs/stable/basics.html#working-with-package-options)):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Display Max Rows: \t 60\n",
      "display.max_rows : int\n",
      "    If max_rows is exceeded, switch to truncate view. Depending on\n",
      "    `large_repr`, objects are either centrally truncated or printed as\n",
      "    a summary view. 'None' value means unlimited.\n",
      "\n",
      "    In case python/IPython is running in a terminal and `large_repr`\n",
      "    equals 'truncate' this can be set to 0 and pandas will auto-detect\n",
      "    the height of the terminal and print a truncated object which fits\n",
      "    the screen height. The IPython notebook, IPython qtconsole, or\n",
      "    IDLE do not run in a terminal and hence it is not possible to do\n",
      "    correct auto-detection.\n",
      "    [default: 60] [currently: 60]\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print ('Display Max Rows: \\t',  pd.get_option('display.max_rows'))\n",
    "pd.describe_option('display.max_rows')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.max_rows', 10)\n",
    "pd.get_option('display.max_rows')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "60"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.reset_option('display.max_rows')\n",
    "pd.get_option('display.max_rows')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Try by yourself**:\n",
    "\n",
    "    pd.describe_option('display.chop_threshold')\n",
    "    pd.describe_option('display.colheader_justify')\n",
    "    pd.describe_option('display.column_space')\n",
    "    pd.describe_option('display.date_dayfirst')\n",
    "    pd.describe_option('display.date_yearfirst')\n",
    "    pd.describe_option('display.encoding')\n",
    "    pd.describe_option('display.expand_frame_repr')\n",
    "    pd.describe_option('display.float_format')\n",
    "    pd.describe_option('display.max_columns')\n",
    "    pd.describe_option('display.max_colwidth')\n",
    "    pd.describe_option('display.max_rows')\n",
    "    pd.describe_option('display.notebook_repr_html')\n",
    "    pd.describe_option('display.precision')\n",
    "    # for more options see documentation..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "            AAPL  GOOG  JNJ  XOM\n",
       "Date                            \n",
       "2015-09-21 115.2 635.4 93.1 73.4\n",
       "2015-09-22 113.4 622.7 93.2 72.7\n",
       "2015-09-23 114.3 622.4 93.0 72.3\n",
       "2015-09-24 115.0 625.8 92.5 72.7\n",
       "2015-09-25 114.7 612.0 91.0 73.2\n",
       "2015-09-28 112.4 594.9 91.4 72.6"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.precision', 2)\n",
    "pd.set_option('display.notebook_repr_html', False)\n",
    "prices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>AAPL</th>\n",
       "      <th>GOOG</th>\n",
       "      <th>JNJ</th>\n",
       "      <th>XOM</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-09-21</th>\n",
       "      <td>115.2</td>\n",
       "      <td>635.4</td>\n",
       "      <td>93.1</td>\n",
       "      <td>73.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-22</th>\n",
       "      <td>113.4</td>\n",
       "      <td>622.7</td>\n",
       "      <td>93.2</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-23</th>\n",
       "      <td>114.3</td>\n",
       "      <td>622.4</td>\n",
       "      <td>93.0</td>\n",
       "      <td>72.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-24</th>\n",
       "      <td>115.0</td>\n",
       "      <td>625.8</td>\n",
       "      <td>92.5</td>\n",
       "      <td>72.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-25</th>\n",
       "      <td>114.7</td>\n",
       "      <td>612.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>73.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-09-28</th>\n",
       "      <td>112.4</td>\n",
       "      <td>594.9</td>\n",
       "      <td>91.4</td>\n",
       "      <td>72.6</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            AAPL  GOOG  JNJ  XOM\n",
       "Date                            \n",
       "2015-09-21 115.2 635.4 93.1 73.4\n",
       "2015-09-22 113.4 622.7 93.2 72.7\n",
       "2015-09-23 114.3 622.4 93.0 72.3\n",
       "2015-09-24 115.0 625.8 92.5 72.7\n",
       "2015-09-25 114.7 612.0 91.0 73.2\n",
       "2015-09-28 112.4 594.9 91.4 72.6"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.set_option('display.notebook_repr_html', True)\n",
    "prices"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "Visit [www.add-for.com](<http://www.add-for.com/IT>) for more tutorials and updates.\n",
    "\n",
    "This work is licensed under a <a rel=\"license\" href=\"http://creativecommons.org/licenses/by-sa/4.0/\">Creative Commons Attribution-ShareAlike 4.0 International License</a>."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}