{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# pandas I/O tools and examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div id='toc-container'><script type='text/javascript'>\n",
       "$(function() {\n",
       "    function regenTOC(){\n",
       "        element = $(\"#toc-container\");\n",
       "\n",
       "\tvar toc = document.createElement(\"div\");\n",
       "\t$(toc).attr(\"class\", \"table-of-contents\");\n",
       "\n",
       "\tvar curLevel = 0;\n",
       "\tvar containerStack = [toc];\n",
       "\tvar levelOfTag = {\"h2\": 1, \"h3\": 2, \"h4\": 3, \"h5\": 4};\n",
       "\n",
       "\tfunction pushLevel() {\n",
       "            var list = document.createElement(\"ul\");\n",
       "            containerStack.push(list);\n",
       "            curLevel++;\n",
       "\t}\n",
       "\t\n",
       "\tfunction popLevel() {\n",
       "            var lastContainer = containerStack.pop();\n",
       "            $(lastContainer).appendTo(containerStack[containerStack.length - 1]);\n",
       "            curLevel--;\n",
       "\t}\n",
       "\t\n",
       "\t$(\".text_cell_render :header\").each(function (i, elem) {\n",
       "            var level = levelOfTag[ elem.tagName.toLowerCase() ];\n",
       "\n",
       "            if (level === undefined)\n",
       "\t\treturn;\n",
       "\n",
       "            while (curLevel < level)\n",
       "\t\tpushLevel();\n",
       "            while (curLevel > level)\n",
       "\t\tpopLevel();\n",
       "            \n",
       "            var listItem = document.createElement(\"li\");\n",
       "            var link = document.createElement(\"a\");\n",
       "            $(link)\n",
       "\t\t.text($(elem).contents().first().text()) // Remove the pilcrow sign\n",
       "\t\t.attr(\"href\", \"#\" + $(elem).attr(\"id\"))\n",
       "\t\t.appendTo(listItem);\n",
       "            $(listItem).appendTo(containerStack[containerStack.length - 1]);\n",
       "\t});\n",
       "\t\n",
       "\twhile (curLevel > 0)\n",
       "            popLevel();\n",
       "\n",
       "        $(\"<a class='btn-update' href='#'>Update</a>\")\n",
       "          .click(regenTOC).prependTo(toc);\n",
       "\n",
       "\t$(toc).prepend(\"<div class='title'>Contents</div>\")\n",
       "          .wrap(\"<div class='toc-headings'/>\");\n",
       "\n",
       "        $(element).empty();\n",
       "        $(element).append(toc);\n",
       "    }\n",
       "\n",
       "    if (typeof(IPython) !== 'undefined')\n",
       "        $([IPython.events]).on('notebook_loaded.Notebook', regenTOC);\n",
       "    regenTOC();\n",
       "});\n",
       "\n",
       "</script></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import addutils.toc ; addutils.toc.js(ipy_notebook=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".text_cell_render @font-face {\n",
       "    font-family: \"Computer Modern\";\n",
       "    src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');\n",
       "}\n",
       "\n",
       "div.cell {\n",
       "    width: 900px;\n",
       "    margin-left: 0% !important;\n",
       "    margin-right: 0%;\n",
       "}\n",
       "\n",
       "code {\n",
       "    font-size:10pt;\n",
       "}\n",
       "\n",
       ".text_cell_render  h1 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:28pt;\n",
       "}\n",
       ".text_cell_render h2 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:24pt;\n",
       "}\n",
       ".text_cell_render h3 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:20pt;\n",
       "}\n",
       ".text_cell_render h4 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:18pt;\n",
       "    margin-top:12px;\n",
       "    margin-bottom: 3px;\n",
       "}\n",
       "\n",
       ".text_cell_render h5 {\n",
       "    font-weight: 300;\n",
       "    font-size: 11pt;\n",
       "    color: rgb( 48, 48, 48 );\n",
       "    font-style: italic;\n",
       "    margin-bottom: .5em;\n",
       "    margin-top: 0.5em;\n",
       "    display: block;\n",
       "}\n",
       "\n",
       ".text_cell_render ul {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 90, 90, 90 );\n",
       "    font-size:11pt;\n",
       "    line-height: 185%;\n",
       "}\n",
       "\n",
       ".text_cell_render yp {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 90, 90, 90 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render strong {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 30, 30, 30 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render a:link {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render a:visited {\n",
       "    color:rgb( 10, 88, 126 );\n",
       "}\n",
       "\n",
       ".text_cell_render {\n",
       "    font-family: Helvetica, Courier, Computer Modern, \"Helvetica Neue\", Arial, Geneva, sans-serif;\n",
       "    color: rgb( 84, 84, 84 );\n",
       "    font-size:11pt;\n",
       "    line-height: 125%;\n",
       "    font-size: 100%;\n",
       "    width:800px;\n",
       "}\n",
       "\n",
       ".CodeMirror {\n",
       "    font-family: Courier, \"Source Code Pro\", source-code-pro,Consolas, monospace;\n",
       "}\n",
       "\n",
       ".warning {\n",
       "    color: rgb( 240, 20, 20 );\n",
       "}\n",
       "\n",
       "/* Pandas tables */\n",
       "/*\n",
       ".rendered_html td {\n",
       "    text-align: right;\n",
       "}\n",
       "*/\n",
       "\n",
       "table.dataframe td {\n",
       "    text-align: right;\n",
       "}\n",
       "\n",
       ".output .table-of-contents {\n",
       "    border: 1px #cecece solid;\n",
       "    background-color: #fafafa;\n",
       "    padding-top: 10px;\n",
       "    padding-bottom: 5px;\n",
       "    padding-right: 15px;\n",
       "    padding-left: 0px;\n",
       "    margin-bottom: 20px;\n",
       "    display: inline-block;\n",
       "    position: relative;\n",
       "}\n",
       "\n",
       ".output .table-of-contents ul {\n",
       "    list-style-type: none;\n",
       "    padding-left: 20px;\n",
       "}\n",
       "\n",
       ".output .table-of-contents .title {\n",
       "    font-weight: bold;\n",
       "    font-height: 11pt;\n",
       "    padding-left: 20px; /* looks better if it's the same to the <ul> */\n",
       "}\n",
       "\n",
       ".output .table-of-contents .btn-update {\n",
       "    position: absolute;\n",
       "    float: right;\n",
       "    right: 11px;\n",
       "    top: 4px;\n",
       "    font-size: 9pt;\n",
       "}\n",
       "\n",
       "</style>\n",
       "<script>\n",
       "    MathJax.Hub.Config({\n",
       "                        TeX: {\n",
       "                           extensions: [\"AMSmath.js\"]\n",
       "                           },\n",
       "                displayAlign: 'center', // Change this to 'center' to center equations.\n",
       "                \"HTML-CSS\": {\n",
       "                    styles: {'.MathJax_Display': {\"margin\": 4}}\n",
       "                }\n",
       "        });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from addutils import css_notebook\n",
    "css_notebook()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1 Matlab Variables"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1 Import a Matlab variable from file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".text_cell_render @font-face {\n",
       "    font-family: \"Computer Modern\";\n",
       "    src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');\n",
       "}\n",
       "\n",
       "div.cell {\n",
       "    width: 900px;\n",
       "    margin-left: 0% !important;\n",
       "    margin-right: 0%;\n",
       "}\n",
       "\n",
       "code {\n",
       "    font-size:10pt;\n",
       "}\n",
       "\n",
       ".text_cell_render  h1 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:28pt;\n",
       "}\n",
       ".text_cell_render h2 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:24pt;\n",
       "}\n",
       ".text_cell_render h3 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:20pt;\n",
       "}\n",
       ".text_cell_render h4 {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:18pt;\n",
       "    margin-top:12px;\n",
       "    margin-bottom: 3px;\n",
       "}\n",
       "\n",
       ".text_cell_render h5 {\n",
       "    font-weight: 300;\n",
       "    font-size: 11pt;\n",
       "    color: rgb( 48, 48, 48 );\n",
       "    font-style: italic;\n",
       "    margin-bottom: .5em;\n",
       "    margin-top: 0.5em;\n",
       "    display: block;\n",
       "}\n",
       "\n",
       ".text_cell_render ul {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 90, 90, 90 );\n",
       "    font-size:11pt;\n",
       "    line-height: 185%;\n",
       "}\n",
       "\n",
       ".text_cell_render yp {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 90, 90, 90 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render strong {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 30, 30, 30 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render a:link {\n",
       "    font-family: Tahoma, sans-serif;\n",
       "    color: rgb( 10, 88, 126 );\n",
       "    font-size:11pt;\n",
       "}\n",
       "\n",
       ".text_cell_render a:visited {\n",
       "    color:rgb( 10, 88, 126 );\n",
       "}\n",
       "\n",
       ".text_cell_render {\n",
       "    font-family: Helvetica, Courier, Computer Modern, \"Helvetica Neue\", Arial, Geneva, sans-serif;\n",
       "    color: rgb( 84, 84, 84 );\n",
       "    font-size:11pt;\n",
       "    line-height: 125%;\n",
       "    font-size: 100%;\n",
       "    width:800px;\n",
       "}\n",
       "\n",
       ".CodeMirror {\n",
       "    font-family: Courier, \"Source Code Pro\", source-code-pro,Consolas, monospace;\n",
       "}\n",
       "\n",
       ".warning {\n",
       "    color: rgb( 240, 20, 20 );\n",
       "}\n",
       "\n",
       "/* Pandas tables */\n",
       "/*\n",
       ".rendered_html td {\n",
       "    text-align: right;\n",
       "}\n",
       "*/\n",
       "\n",
       "table.dataframe td {\n",
       "    text-align: right;\n",
       "}\n",
       "\n",
       ".output .table-of-contents {\n",
       "    border: 1px #cecece solid;\n",
       "    background-color: #fafafa;\n",
       "    padding-top: 10px;\n",
       "    padding-bottom: 5px;\n",
       "    padding-right: 15px;\n",
       "    padding-left: 0px;\n",
       "    margin-bottom: 20px;\n",
       "    display: inline-block;\n",
       "    position: relative;\n",
       "}\n",
       "\n",
       ".output .table-of-contents ul {\n",
       "    list-style-type: none;\n",
       "    padding-left: 20px;\n",
       "}\n",
       "\n",
       ".output .table-of-contents .title {\n",
       "    font-weight: bold;\n",
       "    font-height: 11pt;\n",
       "    padding-left: 20px; /* looks better if it's the same to the <ul> */\n",
       "}\n",
       "\n",
       ".output .table-of-contents .btn-update {\n",
       "    position: absolute;\n",
       "    float: right;\n",
       "    right: 11px;\n",
       "    top: 4px;\n",
       "    font-size: 9pt;\n",
       "}\n",
       "\n",
       "</style>\n",
       "<script>\n",
       "    MathJax.Hub.Config({\n",
       "                        TeX: {\n",
       "                           extensions: [\"AMSmath.js\"]\n",
       "                           },\n",
       "                displayAlign: 'center', // Change this to 'center' to center equations.\n",
       "                \"HTML-CSS\": {\n",
       "                    styles: {'.MathJax_Display': {\"margin\": 4}}\n",
       "                }\n",
       "        });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "import os \n",
    "import scipy.io\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import sqlite3\n",
    "import pandas.io.sql as psql\n",
    "from time import time\n",
    "from IPython.display import display\n",
    "from addutils import css_notebook\n",
    "css_notebook()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import sys"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Import from '.mat' files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "x = scipy.io.loadmat('example_data/matlab_variable.mat')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 92,  99,   1,   8,  15,  67,  74,  51,  58,  40],\n",
       "       [ 98,  80,   7,  14,  16,  73,  55,  57,  64,  41],\n",
       "       [  4,  81,  88,  20,  22,  54,  56,  63,  70,  47],\n",
       "       [ 85,  87,  19,  21,   3,  60,  62,  69,  71,  28],\n",
       "       [ 86,  93,  25,   2,   9,  61,  68,  75,  52,  34],\n",
       "       [ 17,  24,  76,  83,  90,  42,  49,  26,  33,  65],\n",
       "       [ 23,   5,  82,  89,  91,  48,  30,  32,  39,  66],\n",
       "       [ 79,   6,  13,  95,  97,  29,  31,  38,  45,  72],\n",
       "       [ 10,  12,  94,  96,  78,  35,  37,  44,  46,  53],\n",
       "       [ 11,  18, 100,  77,  84,  36,  43,  50,  27,  59]], dtype=uint8)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pyA = x['a']\n",
    "pyA"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The Matlab variable is passed to a pandas DataFrame:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>92</td>\n",
       "      <td>99</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>15</td>\n",
       "      <td>67</td>\n",
       "      <td>74</td>\n",
       "      <td>51</td>\n",
       "      <td>58</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>98</td>\n",
       "      <td>80</td>\n",
       "      <td>7</td>\n",
       "      <td>14</td>\n",
       "      <td>16</td>\n",
       "      <td>73</td>\n",
       "      <td>55</td>\n",
       "      <td>57</td>\n",
       "      <td>64</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>81</td>\n",
       "      <td>88</td>\n",
       "      <td>20</td>\n",
       "      <td>22</td>\n",
       "      <td>54</td>\n",
       "      <td>56</td>\n",
       "      <td>63</td>\n",
       "      <td>70</td>\n",
       "      <td>47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>85</td>\n",
       "      <td>87</td>\n",
       "      <td>19</td>\n",
       "      <td>21</td>\n",
       "      <td>3</td>\n",
       "      <td>60</td>\n",
       "      <td>62</td>\n",
       "      <td>69</td>\n",
       "      <td>71</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>86</td>\n",
       "      <td>93</td>\n",
       "      <td>25</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>61</td>\n",
       "      <td>68</td>\n",
       "      <td>75</td>\n",
       "      <td>52</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>17</td>\n",
       "      <td>24</td>\n",
       "      <td>76</td>\n",
       "      <td>83</td>\n",
       "      <td>90</td>\n",
       "      <td>42</td>\n",
       "      <td>49</td>\n",
       "      <td>26</td>\n",
       "      <td>33</td>\n",
       "      <td>65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>23</td>\n",
       "      <td>5</td>\n",
       "      <td>82</td>\n",
       "      <td>89</td>\n",
       "      <td>91</td>\n",
       "      <td>48</td>\n",
       "      <td>30</td>\n",
       "      <td>32</td>\n",
       "      <td>39</td>\n",
       "      <td>66</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>79</td>\n",
       "      <td>6</td>\n",
       "      <td>13</td>\n",
       "      <td>95</td>\n",
       "      <td>97</td>\n",
       "      <td>29</td>\n",
       "      <td>31</td>\n",
       "      <td>38</td>\n",
       "      <td>45</td>\n",
       "      <td>72</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>10</td>\n",
       "      <td>12</td>\n",
       "      <td>94</td>\n",
       "      <td>96</td>\n",
       "      <td>78</td>\n",
       "      <td>35</td>\n",
       "      <td>37</td>\n",
       "      <td>44</td>\n",
       "      <td>46</td>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>11</td>\n",
       "      <td>18</td>\n",
       "      <td>100</td>\n",
       "      <td>77</td>\n",
       "      <td>84</td>\n",
       "      <td>36</td>\n",
       "      <td>43</td>\n",
       "      <td>50</td>\n",
       "      <td>27</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    0   1    2   3   4   5   6   7   8   9\n",
       "0  92  99    1   8  15  67  74  51  58  40\n",
       "1  98  80    7  14  16  73  55  57  64  41\n",
       "2   4  81   88  20  22  54  56  63  70  47\n",
       "3  85  87   19  21   3  60  62  69  71  28\n",
       "4  86  93   25   2   9  61  68  75  52  34\n",
       "5  17  24   76  83  90  42  49  26  33  65\n",
       "6  23   5   82  89  91  48  30  32  39  66\n",
       "7  79   6   13  95  97  29  31  38  45  72\n",
       "8  10  12   94  96  78  35  37  44  46  53\n",
       "9  11  18  100  77  84  36  43  50  27  59"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame(pyA)\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2 Importing a compressed CSV"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following example shows how to import directly a compressed csv file, in this case with multiple separators:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Names</th>\n",
       "      <th>Phone</th>\n",
       "      <th>Town</th>\n",
       "      <th>Description</th>\n",
       "      <th>Income</th>\n",
       "      <th>Coordinates</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Lydia Z. Flores</td>\n",
       "      <td>(02) 0548 5995</td>\n",
       "      <td>Haddington</td>\n",
       "      <td>sed, sapien. Nunc pulvinar arcu</td>\n",
       "      <td>â¬6,223</td>\n",
       "      <td>23.28054, -24.48755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Bell X. Guerra</td>\n",
       "      <td>(07) 8599 9926</td>\n",
       "      <td>Montes Claros</td>\n",
       "      <td>consectetuer, cursus et, magna. Praesent</td>\n",
       "      <td>â¬14,967</td>\n",
       "      <td>-77.76329, 69.22339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Kirk Q. Bowman</td>\n",
       "      <td>(06) 4153 7501</td>\n",
       "      <td>Cagli</td>\n",
       "      <td>tortor. Nunc commodo auctor velit.</td>\n",
       "      <td>â¬17,399</td>\n",
       "      <td>-58.80037, 22.50537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Karen Pickett</td>\n",
       "      <td>(02) 3216 9708</td>\n",
       "      <td>Cobourg</td>\n",
       "      <td>at auctor ullamcorper, nisl arcu</td>\n",
       "      <td>â¬17,373</td>\n",
       "      <td>75.73982, -78.01872</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Vance J. Johnson</td>\n",
       "      <td>(01) 7568 6371</td>\n",
       "      <td>Carlton</td>\n",
       "      <td>ultricies adipiscing, enim mi tempor</td>\n",
       "      <td>â¬9,025</td>\n",
       "      <td>-86.51337, 109.46298</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID             Names               Phone              Town  \\\n",
       "0   1   Lydia Z. Flores      (02) 0548 5995        Haddington   \n",
       "1   2    Bell X. Guerra      (07) 8599 9926     Montes Claros   \n",
       "2   3    Kirk Q. Bowman      (06) 4153 7501             Cagli   \n",
       "3   4     Karen Pickett      (02) 3216 9708           Cobourg   \n",
       "4   5  Vance J. Johnson      (01) 7568 6371           Carlton   \n",
       "\n",
       "                                     Description                      Income  \\\n",
       "0                sed, sapien. Nunc pulvinar arcu                    â¬6,223   \n",
       "1       consectetuer, cursus et, magna. Praesent                   â¬14,967   \n",
       "2             tortor. Nunc commodo auctor velit.                   â¬17,399   \n",
       "3               at auctor ullamcorper, nisl arcu                   â¬17,373   \n",
       "4           ultricies adipiscing, enim mi tempor                    â¬9,025   \n",
       "\n",
       "            Coordinates  \n",
       "0   23.28054, -24.48755  \n",
       "1   -77.76329, 69.22339  \n",
       "2   -58.80037, 22.50537  \n",
       "3   75.73982, -78.01872  \n",
       "4  -86.51337, 109.46298  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_csv = pd.read_csv('example_data/pd12_peopl.csv.gz', sep=r'\\,\\;\\.',  \n",
    "                     skipinitialspace=True, compression='gzip', engine='python',\n",
    "                     encoding='iso8859_15')\n",
    "df_csv.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**TODO**: utf-8 has problems decoding euro sign. \"\\u00e2\\u0082\\u00ac\" are the three chars you get when the UTF-8 encoded \\u20ac (EURO SIGN) is mistakenly interpreted as ISO-8859-1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Names</th>\n",
       "      <th>Phone</th>\n",
       "      <th>Town</th>\n",
       "      <th>Description</th>\n",
       "      <th>Income</th>\n",
       "      <th>Coordinates</th>\n",
       "      <th>lat</th>\n",
       "      <th>lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Lydia Z. Flores</td>\n",
       "      <td>(02) 0548 5995</td>\n",
       "      <td>Haddington</td>\n",
       "      <td>sed, sapien. Nunc pulvinar arcu</td>\n",
       "      <td>â¬6,223</td>\n",
       "      <td>23.28054, -24.48755</td>\n",
       "      <td>23.28054</td>\n",
       "      <td>-24.48755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Bell X. Guerra</td>\n",
       "      <td>(07) 8599 9926</td>\n",
       "      <td>Montes Claros</td>\n",
       "      <td>consectetuer, cursus et, magna. Praesent</td>\n",
       "      <td>â¬14,967</td>\n",
       "      <td>-77.76329, 69.22339</td>\n",
       "      <td>-77.76329</td>\n",
       "      <td>69.22339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Kirk Q. Bowman</td>\n",
       "      <td>(06) 4153 7501</td>\n",
       "      <td>Cagli</td>\n",
       "      <td>tortor. Nunc commodo auctor velit.</td>\n",
       "      <td>â¬17,399</td>\n",
       "      <td>-58.80037, 22.50537</td>\n",
       "      <td>-58.80037</td>\n",
       "      <td>22.50537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Karen Pickett</td>\n",
       "      <td>(02) 3216 9708</td>\n",
       "      <td>Cobourg</td>\n",
       "      <td>at auctor ullamcorper, nisl arcu</td>\n",
       "      <td>â¬17,373</td>\n",
       "      <td>75.73982, -78.01872</td>\n",
       "      <td>75.73982</td>\n",
       "      <td>-78.01872</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Vance J. Johnson</td>\n",
       "      <td>(01) 7568 6371</td>\n",
       "      <td>Carlton</td>\n",
       "      <td>ultricies adipiscing, enim mi tempor</td>\n",
       "      <td>â¬9,025</td>\n",
       "      <td>-86.51337, 109.46298</td>\n",
       "      <td>-86.51337</td>\n",
       "      <td>109.46298</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID             Names               Phone              Town  \\\n",
       "0   1   Lydia Z. Flores      (02) 0548 5995        Haddington   \n",
       "1   2    Bell X. Guerra      (07) 8599 9926     Montes Claros   \n",
       "2   3    Kirk Q. Bowman      (06) 4153 7501             Cagli   \n",
       "3   4     Karen Pickett      (02) 3216 9708           Cobourg   \n",
       "4   5  Vance J. Johnson      (01) 7568 6371           Carlton   \n",
       "\n",
       "                                     Description                      Income  \\\n",
       "0                sed, sapien. Nunc pulvinar arcu                    â¬6,223   \n",
       "1       consectetuer, cursus et, magna. Praesent                   â¬14,967   \n",
       "2             tortor. Nunc commodo auctor velit.                   â¬17,399   \n",
       "3               at auctor ullamcorper, nisl arcu                   â¬17,373   \n",
       "4           ultricies adipiscing, enim mi tempor                    â¬9,025   \n",
       "\n",
       "            Coordinates        lat         lon  \n",
       "0   23.28054, -24.48755   23.28054   -24.48755  \n",
       "1   -77.76329, 69.22339  -77.76329    69.22339  \n",
       "2   -58.80037, 22.50537  -58.80037    22.50537  \n",
       "3   75.73982, -78.01872   75.73982   -78.01872  \n",
       "4  -86.51337, 109.46298  -86.51337   109.46298  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "coor = df_csv['Coordinates']\n",
    "df_csv['lat'] = ''\n",
    "df_csv['lon'] = ''\n",
    "for j, coo in enumerate(coor):\n",
    "    spl = re.split(',', str(coo))\n",
    "    df_csv.loc[j, 'lat'] = spl[0] \n",
    "    df_csv.loc[j, 'lon'] = spl[1]\n",
    "df_csv.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_csv.to_json('temp/converted_json.json')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3 Importing JSON files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Coordinates</th>\n",
       "      <th>Description</th>\n",
       "      <th>ID</th>\n",
       "      <th>Income</th>\n",
       "      <th>Names</th>\n",
       "      <th>Phone</th>\n",
       "      <th>Town</th>\n",
       "      <th>lat</th>\n",
       "      <th>lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>23.28054, -24.48755</td>\n",
       "      <td>sed, sapien. Nunc pulvinar arcu</td>\n",
       "      <td>1</td>\n",
       "      <td>â¬6,223</td>\n",
       "      <td>Lydia Z. Flores</td>\n",
       "      <td>(02) 0548 5995</td>\n",
       "      <td>Haddington</td>\n",
       "      <td>23.28054</td>\n",
       "      <td>-24.48755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-77.76329, 69.22339</td>\n",
       "      <td>consectetuer, cursus et, magna. Praesent</td>\n",
       "      <td>2</td>\n",
       "      <td>â¬14,967</td>\n",
       "      <td>Bell X. Guerra</td>\n",
       "      <td>(07) 8599 9926</td>\n",
       "      <td>Montes Claros</td>\n",
       "      <td>-77.76329</td>\n",
       "      <td>69.22339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>-62.95199, 116.29751</td>\n",
       "      <td>Duis sit amet diam eu</td>\n",
       "      <td>11</td>\n",
       "      <td>â¬15,764</td>\n",
       "      <td>Jameson H. Craig</td>\n",
       "      <td>(07) 8640 5274</td>\n",
       "      <td>Fairbanks</td>\n",
       "      <td>-62.95199</td>\n",
       "      <td>116.29751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>-42.26793, -92.23605</td>\n",
       "      <td>et netus et malesuada fames</td>\n",
       "      <td>12</td>\n",
       "      <td>â¬18,460</td>\n",
       "      <td>Michael Randall</td>\n",
       "      <td>(04) 3451 8606</td>\n",
       "      <td>Mezzana</td>\n",
       "      <td>-42.26793</td>\n",
       "      <td>-92.23605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>-20.39897, 152.4122</td>\n",
       "      <td>id risus quis diam luctus</td>\n",
       "      <td>13</td>\n",
       "      <td>â¬6,175</td>\n",
       "      <td>Sierra Rivers</td>\n",
       "      <td>(01) 2020 4511</td>\n",
       "      <td>Lugnano in Teverina</td>\n",
       "      <td>-20.39897</td>\n",
       "      <td>152.41220</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Coordinates                                    Description  ID  \\\n",
       "0    23.28054, -24.48755                sed, sapien. Nunc pulvinar arcu   1   \n",
       "1    -77.76329, 69.22339       consectetuer, cursus et, magna. Praesent   2   \n",
       "10  -62.95199, 116.29751                          Duis sit amet diam eu  11   \n",
       "11  -42.26793, -92.23605                    et netus et malesuada fames  12   \n",
       "12   -20.39897, 152.4122                      id risus quis diam luctus  13   \n",
       "\n",
       "                        Income             Names               Phone  \\\n",
       "0                     â¬6,223   Lydia Z. Flores      (02) 0548 5995   \n",
       "1                    â¬14,967    Bell X. Guerra      (07) 8599 9926   \n",
       "10                   â¬15,764  Jameson H. Craig      (07) 8640 5274   \n",
       "11                   â¬18,460   Michael Randall      (04) 3451 8606   \n",
       "12                    â¬6,175     Sierra Rivers      (01) 2020 4511   \n",
       "\n",
       "                   Town       lat        lon  \n",
       "0            Haddington  23.28054  -24.48755  \n",
       "1         Montes Claros -77.76329   69.22339  \n",
       "10            Fairbanks -62.95199  116.29751  \n",
       "11              Mezzana -42.26793  -92.23605  \n",
       "12  Lugnano in Teverina -20.39897  152.41220  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with open('temp/converted_json.json') as f:\n",
    "    data = f.read().encode('utf-8')\n",
    "dfjson = pd.read_json(data)\n",
    "dfjson.head()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Since the import reordered columns in alphabetical order, we can choose a preferred column order:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Names</th>\n",
       "      <th>Phone</th>\n",
       "      <th>Income</th>\n",
       "      <th>Town</th>\n",
       "      <th>lat</th>\n",
       "      <th>lon</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Lydia Z. Flores</td>\n",
       "      <td>(02) 0548 5995</td>\n",
       "      <td>â¬6,223</td>\n",
       "      <td>Haddington</td>\n",
       "      <td>23.28054</td>\n",
       "      <td>-24.48755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Bell X. Guerra</td>\n",
       "      <td>(07) 8599 9926</td>\n",
       "      <td>â¬14,967</td>\n",
       "      <td>Montes Claros</td>\n",
       "      <td>-77.76329</td>\n",
       "      <td>69.22339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>Jameson H. Craig</td>\n",
       "      <td>(07) 8640 5274</td>\n",
       "      <td>â¬15,764</td>\n",
       "      <td>Fairbanks</td>\n",
       "      <td>-62.95199</td>\n",
       "      <td>116.29751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>Michael Randall</td>\n",
       "      <td>(04) 3451 8606</td>\n",
       "      <td>â¬18,460</td>\n",
       "      <td>Mezzana</td>\n",
       "      <td>-42.26793</td>\n",
       "      <td>-92.23605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>Sierra Rivers</td>\n",
       "      <td>(01) 2020 4511</td>\n",
       "      <td>â¬6,175</td>\n",
       "      <td>Lugnano in Teverina</td>\n",
       "      <td>-20.39897</td>\n",
       "      <td>152.41220</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    ID             Names               Phone                      Income  \\\n",
       "0    1   Lydia Z. Flores      (02) 0548 5995                    â¬6,223   \n",
       "1    2    Bell X. Guerra      (07) 8599 9926                   â¬14,967   \n",
       "10  11  Jameson H. Craig      (07) 8640 5274                   â¬15,764   \n",
       "11  12   Michael Randall      (04) 3451 8606                   â¬18,460   \n",
       "12  13     Sierra Rivers      (01) 2020 4511                    â¬6,175   \n",
       "\n",
       "                   Town       lat        lon  \n",
       "0            Haddington  23.28054  -24.48755  \n",
       "1         Montes Claros -77.76329   69.22339  \n",
       "10            Fairbanks -62.95199  116.29751  \n",
       "11              Mezzana -42.26793  -92.23605  \n",
       "12  Lugnano in Teverina -20.39897  152.41220  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfjson = dfjson.loc[:, ['ID', 'Names', 'Phone', 'Income', 'Town', 'lat', 'lon']]\n",
    "dfjson.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4 Importing HTML"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: `read_html` returns a **list** of DataFrame objects, even if there is only a single table contained in the `HTML` content. Infer_types avoids the function trying to automatically detect numeric and date types (this generated an error with coordinates)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Nomi maschili</th>\n",
       "      <th>Strade</th>\n",
       "      <th>Città</th>\n",
       "      <th>Coordinate</th>\n",
       "      <th>Ente</th>\n",
       "      <th>mail</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Drew</td>\n",
       "      <td>5672 Accumsan Road</td>\n",
       "      <td>Whitburn</td>\n",
       "      <td>-54.68429, -67.21709</td>\n",
       "      <td>Ac Consulting</td>\n",
       "      <td>a.aliquet.vel@vitae.co.uk</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Finn</td>\n",
       "      <td>Ap #170-4074 Interdum Rd.</td>\n",
       "      <td>Pelotas</td>\n",
       "      <td>81.93087, 168.14556</td>\n",
       "      <td>Varius Incorporated</td>\n",
       "      <td>nec.malesuada.ut@primisin.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Martin</td>\n",
       "      <td>Ap #471-6260 Etiam Ave</td>\n",
       "      <td>Aliano</td>\n",
       "      <td>33.99272, -148.70584</td>\n",
       "      <td>Arcu Industries</td>\n",
       "      <td>lacus@risus.net</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zephania</td>\n",
       "      <td>Ap #720-7951 Aliquam Ave</td>\n",
       "      <td>Senneville</td>\n",
       "      <td>-56.82569, -72.80435</td>\n",
       "      <td>Tristique PC</td>\n",
       "      <td>arcu@sempereratin.co.uk</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Devin</td>\n",
       "      <td>3538 Dui St.</td>\n",
       "      <td>Zwettl-Niederösterreich</td>\n",
       "      <td>-11.90874, -149.61123</td>\n",
       "      <td>Imperdiet Dictum PC</td>\n",
       "      <td>lectus.rutrum.urna@massaQuisqueporttitor.ca</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Nomi maschili                     Strade                    Città  \\\n",
       "0          Drew         5672 Accumsan Road                 Whitburn   \n",
       "1          Finn  Ap #170-4074 Interdum Rd.                  Pelotas   \n",
       "2        Martin     Ap #471-6260 Etiam Ave                   Aliano   \n",
       "3      Zephania   Ap #720-7951 Aliquam Ave               Senneville   \n",
       "4         Devin               3538 Dui St.  Zwettl-Niederösterreich   \n",
       "\n",
       "              Coordinate                 Ente  \\\n",
       "0   -54.68429, -67.21709        Ac Consulting   \n",
       "1    81.93087, 168.14556  Varius Incorporated   \n",
       "2   33.99272, -148.70584      Arcu Industries   \n",
       "3   -56.82569, -72.80435         Tristique PC   \n",
       "4  -11.90874, -149.61123  Imperdiet Dictum PC   \n",
       "\n",
       "                                          mail  \n",
       "0                    a.aliquet.vel@vitae.co.uk  \n",
       "1                nec.malesuada.ut@primisin.com  \n",
       "2                              lacus@risus.net  \n",
       "3                      arcu@sempereratin.co.uk  \n",
       "4  lectus.rutrum.urna@massaQuisqueporttitor.ca  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfhtml = pd.read_html('example_data/generated.html', header=0)\n",
    "dfhtml[0].head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5 Importing Excel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Names</th>\n",
       "      <th>Streets</th>\n",
       "      <th>Town</th>\n",
       "      <th>Coordinates</th>\n",
       "      <th>Corporation</th>\n",
       "      <th>mail</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Stephen</td>\n",
       "      <td>Ap #389-3365 Risus, St.</td>\n",
       "      <td>Lelystad</td>\n",
       "      <td>51.91783, -47.01037</td>\n",
       "      <td>Consectetuer Rhoncus Nullam Corporation</td>\n",
       "      <td>Suspendisse.sed@pedePraesenteu.org</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Kenneth</td>\n",
       "      <td>120-6483 Ligula. Ave</td>\n",
       "      <td>Labrecque</td>\n",
       "      <td>-1.92625, 10.02451</td>\n",
       "      <td>A Corporation</td>\n",
       "      <td>orci@egetmollislectus.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Leo</td>\n",
       "      <td>Ap #727-2085 Eget Av.</td>\n",
       "      <td>Strathcona County</td>\n",
       "      <td>-17.31839, 137.99307</td>\n",
       "      <td>Sed Limited</td>\n",
       "      <td>Etiam@vel.org</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Joshua</td>\n",
       "      <td>P.O. Box 425, 6462 Arcu Rd.</td>\n",
       "      <td>Municipal District</td>\n",
       "      <td>-51.34642, 80.32145</td>\n",
       "      <td>Mauris Sapien Cursus Corp.</td>\n",
       "      <td>lorem@ornarelectusante.net</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Zephania</td>\n",
       "      <td>655 Et, St.</td>\n",
       "      <td>Couillet</td>\n",
       "      <td>29.96525, 124.18391</td>\n",
       "      <td>Odio Semper Cursus Corp.</td>\n",
       "      <td>metus@a.ca</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      Names                      Streets                Town  \\\n",
       "0   Stephen      Ap #389-3365 Risus, St.            Lelystad   \n",
       "1   Kenneth         120-6483 Ligula. Ave           Labrecque   \n",
       "2       Leo        Ap #727-2085 Eget Av.   Strathcona County   \n",
       "3    Joshua  P.O. Box 425, 6462 Arcu Rd.  Municipal District   \n",
       "4  Zephania                  655 Et, St.            Couillet   \n",
       "\n",
       "            Coordinates                              Corporation  \\\n",
       "0   51.91783, -47.01037  Consectetuer Rhoncus Nullam Corporation   \n",
       "1    -1.92625, 10.02451                            A Corporation   \n",
       "2  -17.31839, 137.99307                              Sed Limited   \n",
       "3   -51.34642, 80.32145               Mauris Sapien Cursus Corp.   \n",
       "4   29.96525, 124.18391                 Odio Semper Cursus Corp.   \n",
       "\n",
       "                                 mail  \n",
       "0  Suspendisse.sed@pedePraesenteu.org  \n",
       "1           orci@egetmollislectus.com  \n",
       "2                       Etiam@vel.org  \n",
       "3          lorem@ornarelectusante.net  \n",
       "4                          metus@a.ca  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfxl = pd.read_excel('example_data/generated2.xls', 'foglio')\n",
    "dfxl.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6 Working with SQL and databases"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.1 Write SQL"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's store the DataFrame opened from excel in a database. We use SQLite, a database engine library suitable for storing data in a single-file database. 'Names' is the name we chose for the database table we are creating:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "con = sqlite3.connect(\"temp.sql\")\n",
    "sqlfile = dfxl.to_sql('Names', con)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.2 Import SQL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(100, 7)\n"
     ]
    }
   ],
   "source": [
    "con = sqlite3.connect('temp.sql')\n",
    "with con:\n",
    "    sql = \"SELECT * FROM Names;\"\n",
    "    df = psql.read_sql(sql, con)\n",
    "    print(df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>Names</th>\n",
       "      <th>Streets</th>\n",
       "      <th>Town</th>\n",
       "      <th>Coordinates</th>\n",
       "      <th>Corporation</th>\n",
       "      <th>mail</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>Stephen</td>\n",
       "      <td>Ap #389-3365 Risus, St.</td>\n",
       "      <td>Lelystad</td>\n",
       "      <td>51.91783, -47.01037</td>\n",
       "      <td>Consectetuer Rhoncus Nullam Corporation</td>\n",
       "      <td>Suspendisse.sed@pedePraesenteu.org</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Kenneth</td>\n",
       "      <td>120-6483 Ligula. Ave</td>\n",
       "      <td>Labrecque</td>\n",
       "      <td>-1.92625, 10.02451</td>\n",
       "      <td>A Corporation</td>\n",
       "      <td>orci@egetmollislectus.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>Leo</td>\n",
       "      <td>Ap #727-2085 Eget Av.</td>\n",
       "      <td>Strathcona County</td>\n",
       "      <td>-17.31839, 137.99307</td>\n",
       "      <td>Sed Limited</td>\n",
       "      <td>Etiam@vel.org</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>Joshua</td>\n",
       "      <td>P.O. Box 425, 6462 Arcu Rd.</td>\n",
       "      <td>Municipal District</td>\n",
       "      <td>-51.34642, 80.32145</td>\n",
       "      <td>Mauris Sapien Cursus Corp.</td>\n",
       "      <td>lorem@ornarelectusante.net</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>Zephania</td>\n",
       "      <td>655 Et, St.</td>\n",
       "      <td>Couillet</td>\n",
       "      <td>29.96525, 124.18391</td>\n",
       "      <td>Odio Semper Cursus Corp.</td>\n",
       "      <td>metus@a.ca</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   index     Names                      Streets                Town  \\\n",
       "0      0   Stephen      Ap #389-3365 Risus, St.            Lelystad   \n",
       "1      1   Kenneth         120-6483 Ligula. Ave           Labrecque   \n",
       "2      2       Leo        Ap #727-2085 Eget Av.   Strathcona County   \n",
       "3      3    Joshua  P.O. Box 425, 6462 Arcu Rd.  Municipal District   \n",
       "4      4  Zephania                  655 Et, St.            Couillet   \n",
       "\n",
       "            Coordinates                              Corporation  \\\n",
       "0   51.91783, -47.01037  Consectetuer Rhoncus Nullam Corporation   \n",
       "1    -1.92625, 10.02451                            A Corporation   \n",
       "2  -17.31839, 137.99307                              Sed Limited   \n",
       "3   -51.34642, 80.32145               Mauris Sapien Cursus Corp.   \n",
       "4   29.96525, 124.18391                 Odio Semper Cursus Corp.   \n",
       "\n",
       "                                 mail  \n",
       "0  Suspendisse.sed@pedePraesenteu.org  \n",
       "1           orci@egetmollislectus.com  \n",
       "2                       Etiam@vel.org  \n",
       "3          lorem@ornarelectusante.net  \n",
       "4                          metus@a.ca  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "con.close()\n",
    "os.remove(\"temp.sql\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7 Working with HDF5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.1 Storer format"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**HDFStore** is a dict-like object used by pandas to store datasets as **HDF5** files using the **PyTables** library. **HDF5** is a scientific hierarchical data format suitable for storing in a file very large and multi-dimensional data arrays. The **Storer** format stores fixed arrays, which are queryiable and must be retrieved in their entirety."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Add DataFrames to the HDFStore object:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "samples_01 = int(3e5)\n",
    "samples_02 = int(1e5)\n",
    "idx1 = pd.date_range('1/1/2000 12:00:00', periods=samples_01, freq='50ms', tz='Europe/Rome')\n",
    "idx2 = pd.date_range('1/1/2000 12:05:00', periods=samples_02, freq='100ms', tz='Europe/Rome')\n",
    "randn = np.random.randn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame(randn(samples_01, 3), index=idx1, columns=['A', 'B', 'C'])\n",
    "df2 = pd.DataFrame(randn(samples_02, 4), index=idx2, columns=['A', 'B', 'C', 'D'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of the Dataset:  13.73291015625  MB\n"
     ]
    }
   ],
   "source": [
    "print ('Size of the Dataset: ', (df1.values.nbytes+df1.values.nbytes)/2**20, ' MB')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "20.4 ms ± 612 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "with pd.HDFStore('temp/store53.h5') as store:\n",
    "    store.put('storer/df1', df1)\n",
    "    store.put('storer/df2', df2)\n",
    "    store.put('to_remove', df2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Retrieve stored objects:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['/', '/to_remove', '/storer/df1', '/storer/df2', '/table/df1_appended']\n",
      "                                         A         B         C\n",
      "2000-01-01 12:00:00.050000+01:00 -1.224140  0.058376  2.181400\n",
      "2000-01-01 12:00:00.100000+01:00  0.461911  0.018024 -0.165891\n",
      "                                         A         B         C\n",
      "2000-01-01 12:00:00.050000+01:00 -1.224140  0.058376  2.181400\n",
      "2000-01-01 12:00:00.100000+01:00  0.461911  0.018024 -0.165891\n",
      "Check retrieved data equal to original data: \n",
      "True\n"
     ]
    }
   ],
   "source": [
    "with pd.HDFStore('temp/store53.h5') as store:\n",
    "    print (store.keys())\n",
    "    df1_retrieved = store.get('storer/df1')\n",
    "    print (df1_retrieved[1:3])\n",
    "    print (df1[1:3])\n",
    "    print ('Check retrieved data equal to original data: ')\n",
    "    print (df1_retrieved[1:3].equals(df1[1:3]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Delete objects:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.io.pytables.HDFStore'>\n",
      "File path: temp/store53.h5\n",
      "\n"
     ]
    }
   ],
   "source": [
    "with pd.HDFStore('temp/store53.h5') as store:\n",
    "    try:\n",
    "        store.remove('to_remove')\n",
    "    except:\n",
    "        pass\n",
    "    print (store)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.2 Table format"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The table format conceptually is shaped very much like a DataFrame and may be appended to in the same or other sessions. In addition, delete & query type operations are supported."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with pd.HDFStore('temp/store53.h5') as store:\n",
    "    # store.append creates a table automatically:\n",
    "    store.append('table/df1_appended', df1.iloc[:10000])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with pd.HDFStore('temp/store53.h5') as store:\n",
    "    store.append('table/df1_appended', df1.iloc[10001:20000])\n",
    "    store.append('table/df1_appended', df1.iloc[20001:50000])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.3 Querying a Table"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Query the table using boolean expression with in-line function evaluation:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.200000+01:00</th>\n",
       "      <td>-0.516980</td>\n",
       "      <td>0.231047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.250000+01:00</th>\n",
       "      <td>0.151773</td>\n",
       "      <td>1.263926</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.300000+01:00</th>\n",
       "      <td>-0.221863</td>\n",
       "      <td>2.628103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.350000+01:00</th>\n",
       "      <td>0.454621</td>\n",
       "      <td>-0.378720</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.200000+01:00</th>\n",
       "      <td>-3.104964</td>\n",
       "      <td>-1.301010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.250000+01:00</th>\n",
       "      <td>0.883893</td>\n",
       "      <td>0.319630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.300000+01:00</th>\n",
       "      <td>0.780367</td>\n",
       "      <td>-0.921197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.350000+01:00</th>\n",
       "      <td>1.117064</td>\n",
       "      <td>-0.678611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.200000+01:00</th>\n",
       "      <td>1.091494</td>\n",
       "      <td>-1.163870</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.250000+01:00</th>\n",
       "      <td>-0.131051</td>\n",
       "      <td>-1.156213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.300000+01:00</th>\n",
       "      <td>0.773293</td>\n",
       "      <td>-1.646943</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.350000+01:00</th>\n",
       "      <td>-0.293174</td>\n",
       "      <td>0.268033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.200000+01:00</th>\n",
       "      <td>0.209013</td>\n",
       "      <td>0.178377</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.250000+01:00</th>\n",
       "      <td>0.714508</td>\n",
       "      <td>-0.081073</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.300000+01:00</th>\n",
       "      <td>-0.115000</td>\n",
       "      <td>-1.940541</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.350000+01:00</th>\n",
       "      <td>-0.317933</td>\n",
       "      <td>0.245165</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.200000+01:00</th>\n",
       "      <td>1.088952</td>\n",
       "      <td>-0.470424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.250000+01:00</th>\n",
       "      <td>-1.921369</td>\n",
       "      <td>-1.620379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.300000+01:00</th>\n",
       "      <td>0.799032</td>\n",
       "      <td>0.974941</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2000-01-01 12:00:00.350000+01:00</th>\n",
       "      <td>0.839663</td>\n",
       "      <td>0.301037</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         A         B\n",
       "2000-01-01 12:00:00.200000+01:00 -0.516980  0.231047\n",
       "2000-01-01 12:00:00.250000+01:00  0.151773  1.263926\n",
       "2000-01-01 12:00:00.300000+01:00 -0.221863  2.628103\n",
       "2000-01-01 12:00:00.350000+01:00  0.454621 -0.378720\n",
       "2000-01-01 12:00:00.200000+01:00 -3.104964 -1.301010\n",
       "2000-01-01 12:00:00.250000+01:00  0.883893  0.319630\n",
       "2000-01-01 12:00:00.300000+01:00  0.780367 -0.921197\n",
       "2000-01-01 12:00:00.350000+01:00  1.117064 -0.678611\n",
       "2000-01-01 12:00:00.200000+01:00  1.091494 -1.163870\n",
       "2000-01-01 12:00:00.250000+01:00 -0.131051 -1.156213\n",
       "2000-01-01 12:00:00.300000+01:00  0.773293 -1.646943\n",
       "2000-01-01 12:00:00.350000+01:00 -0.293174  0.268033\n",
       "2000-01-01 12:00:00.200000+01:00  0.209013  0.178377\n",
       "2000-01-01 12:00:00.250000+01:00  0.714508 -0.081073\n",
       "2000-01-01 12:00:00.300000+01:00 -0.115000 -1.940541\n",
       "2000-01-01 12:00:00.350000+01:00 -0.317933  0.245165\n",
       "2000-01-01 12:00:00.200000+01:00  1.088952 -0.470424\n",
       "2000-01-01 12:00:00.250000+01:00 -1.921369 -1.620379\n",
       "2000-01-01 12:00:00.300000+01:00  0.799032  0.974941\n",
       "2000-01-01 12:00:00.350000+01:00  0.839663  0.301037"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "with pd.HDFStore('temp/store53.h5') as store:\n",
    "    query01 = store.select('table/df1_appended',\n",
    "                           \"index>=Timestamp('2000-01-01 12:00:00.20+01:00') \\\n",
    "                           & index<Timestamp('2000-01-01 12:00:00.40+01:00') \\\n",
    "                           & columns=['A', 'B']\")\n",
    "query01"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "Visit [www.add-for.com](<http://www.add-for.com/IT>) for more tutorials and updates.\n",
    "\n",
    "This work is licensed under a <a rel=\"license\" href=\"http://creativecommons.org/licenses/by-sa/4.0/\">Creative Commons Attribution-ShareAlike 4.0 International License</a>."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:addfor_tutorials]",
   "language": "python",
   "name": "conda-env-addfor_tutorials-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}