{
 "metadata": {
  "name": "",
  "signature": "sha256:56d35a8b5a4b1d6d9786cef5749d25ced30b12fab48e502363ccd61a0a019706"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "A simple demonstration of pivot_table. [Reshaping and Pivot Tables \u2014 pandas 0.13.1 documentation](http://pandas.pydata.org/pandas-docs/stable/reshaping.html)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd\n",
      "from pandas import DataFrame, Series"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df = DataFrame([{\n",
      "'year':1880,\n",
      "'name':'John',\n",
      "'sex': 'M',\n",
      "'births': 13\n",
      "},\n",
      "{'year':1880,\n",
      "'name':'Pat',\n",
      "'sex': 'M',\n",
      "'births': 13\n",
      "},\n",
      "{'year':1880,\n",
      "'name':'Pat',\n",
      "'sex': 'F',\n",
      "'births': 13\n",
      "},\n",
      "{\n",
      "'year':1880,\n",
      "'name':'Jane',\n",
      "'sex': 'F',\n",
      "'births': 20\n",
      "}, \n",
      "{\n",
      "'year':1881,\n",
      "'name':'John',\n",
      "'sex': 'M',\n",
      "'births': 90\n",
      "},\n",
      "{\n",
      "'year':1881,\n",
      "'name':'Jane',\n",
      "'sex': 'F',\n",
      "'births': 21\n",
      "},])\n",
      "\n",
      "df"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>births</th>\n",
        "      <th>name</th>\n",
        "      <th>sex</th>\n",
        "      <th>year</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 13</td>\n",
        "      <td> John</td>\n",
        "      <td> M</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 13</td>\n",
        "      <td>  Pat</td>\n",
        "      <td> M</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 13</td>\n",
        "      <td>  Pat</td>\n",
        "      <td> F</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 20</td>\n",
        "      <td> Jane</td>\n",
        "      <td> F</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 90</td>\n",
        "      <td> John</td>\n",
        "      <td> M</td>\n",
        "      <td> 1881</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td> 21</td>\n",
        "      <td> Jane</td>\n",
        "      <td> F</td>\n",
        "      <td> 1881</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>6 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 2,
       "text": [
        "   births  name sex  year\n",
        "0      13  John   M  1880\n",
        "1      13   Pat   M  1880\n",
        "2      13   Pat   F  1880\n",
        "3      20  Jane   F  1880\n",
        "4      90  John   M  1881\n",
        "5      21  Jane   F  1881\n",
        "\n",
        "[6 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pt  = df.pivot_table(rows='year', cols=['name','sex'])['births']\n",
      "pt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr>\n",
        "      <th>name</th>\n",
        "      <th>Jane</th>\n",
        "      <th>John</th>\n",
        "      <th colspan=\"2\" halign=\"left\">Pat</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>sex</th>\n",
        "      <th>F</th>\n",
        "      <th>M</th>\n",
        "      <th>F</th>\n",
        "      <th>M</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>year</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1880</th>\n",
        "      <td> 20</td>\n",
        "      <td> 13</td>\n",
        "      <td> 13</td>\n",
        "      <td> 13</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1881</th>\n",
        "      <td> 21</td>\n",
        "      <td> 90</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>2 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "name  Jane  John  Pat    \n",
        "sex      F     M    F   M\n",
        "year                     \n",
        "1880    20    13   13  13\n",
        "1881    21    90  NaN NaN\n",
        "\n",
        "[2 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# let's make a new table in which there is M/F subindex for all names\n",
      "\n",
      "names = set(pt.columns.get_level_values(level=0))\n",
      "sexes = set(pt.columns.get_level_values(level=1))\n",
      "names, sexes"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 4,
       "text": [
        "({'Jane', 'John', 'Pat'}, {'F', 'M'})"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# http://pandas.pydata.org/pandas-docs/stable/indexing.html#creating-a-multiindex-hierarchical-index-object\n",
      "\n",
      "new_index = pd.MultiIndex.from_product([list(names), list(sexes)],\n",
      "                           names=['name','sex'])\n",
      "new_index"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 5,
       "text": [
        "MultiIndex(levels=[[u'Jane', u'John', u'Pat'], [u'F', u'M']],\n",
        "           labels=[[0, 0, 1, 1, 2, 2], [1, 0, 1, 0, 1, 0]],\n",
        "           names=[u'name', u'sex'])"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html\n",
      "\n",
      "pt.T.reindex(new_index).T"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr>\n",
        "      <th>name</th>\n",
        "      <th colspan=\"2\" halign=\"left\">Jane</th>\n",
        "      <th colspan=\"2\" halign=\"left\">John</th>\n",
        "      <th colspan=\"2\" halign=\"left\">Pat</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>sex</th>\n",
        "      <th>M</th>\n",
        "      <th>F</th>\n",
        "      <th>M</th>\n",
        "      <th>F</th>\n",
        "      <th>M</th>\n",
        "      <th>F</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>year</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1880</th>\n",
        "      <td>NaN</td>\n",
        "      <td> 20</td>\n",
        "      <td> 13</td>\n",
        "      <td>NaN</td>\n",
        "      <td> 13</td>\n",
        "      <td> 13</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1881</th>\n",
        "      <td>NaN</td>\n",
        "      <td> 21</td>\n",
        "      <td> 90</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "      <td>NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>2 rows \u00d7 6 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 6,
       "text": [
        "name  Jane      John      Pat    \n",
        "sex      M   F     M   F    M   F\n",
        "year                             \n",
        "1880   NaN  20    13 NaN   13  13\n",
        "1881   NaN  21    90 NaN  NaN NaN\n",
        "\n",
        "[2 rows x 6 columns]"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pt.T.reindex(new_index).T.fillna(0)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr>\n",
        "      <th>name</th>\n",
        "      <th colspan=\"2\" halign=\"left\">Jane</th>\n",
        "      <th colspan=\"2\" halign=\"left\">John</th>\n",
        "      <th colspan=\"2\" halign=\"left\">Pat</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>sex</th>\n",
        "      <th>M</th>\n",
        "      <th>F</th>\n",
        "      <th>M</th>\n",
        "      <th>F</th>\n",
        "      <th>M</th>\n",
        "      <th>F</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>year</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>1880</th>\n",
        "      <td> 0</td>\n",
        "      <td> 20</td>\n",
        "      <td> 13</td>\n",
        "      <td> 0</td>\n",
        "      <td> 13</td>\n",
        "      <td> 13</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1881</th>\n",
        "      <td> 0</td>\n",
        "      <td> 21</td>\n",
        "      <td> 90</td>\n",
        "      <td> 0</td>\n",
        "      <td>  0</td>\n",
        "      <td>  0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>2 rows \u00d7 6 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 7,
       "text": [
        "name  Jane      John     Pat    \n",
        "sex      M   F     M  F    M   F\n",
        "year                            \n",
        "1880     0  20    13  0   13  13\n",
        "1881     0  21    90  0    0   0\n",
        "\n",
        "[2 rows x 6 columns]"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum'))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th>births</th>\n",
        "      <th>year</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>year</th>\n",
        "      <th>name</th>\n",
        "      <th>sex</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th rowspan=\"4\" valign=\"top\">1880</th>\n",
        "      <th>Jane</th>\n",
        "      <th>F</th>\n",
        "      <td> 20</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>John</th>\n",
        "      <th>M</th>\n",
        "      <td> 13</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th rowspan=\"2\" valign=\"top\">Pat</th>\n",
        "      <th>F</th>\n",
        "      <td> 13</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>M</th>\n",
        "      <td> 13</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th rowspan=\"2\" valign=\"top\">1881</th>\n",
        "      <th>Jane</th>\n",
        "      <th>F</th>\n",
        "      <td> 21</td>\n",
        "      <td> 1881</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>John</th>\n",
        "      <th>M</th>\n",
        "      <td> 90</td>\n",
        "      <td> 1881</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>6 rows \u00d7 2 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 19,
       "text": [
        "               births  year\n",
        "year name sex              \n",
        "1880 Jane F        20  1880\n",
        "     John M        13  1880\n",
        "     Pat  F        13  1880\n",
        "          M        13  1880\n",
        "1881 Jane F        21  1881\n",
        "     John M        90  1881\n",
        "\n",
        "[6 rows x 2 columns]"
       ]
      }
     ],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum')).unstack()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th colspan=\"2\" halign=\"left\">births</th>\n",
        "      <th colspan=\"2\" halign=\"left\">year</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th></th>\n",
        "      <th>sex</th>\n",
        "      <th>F</th>\n",
        "      <th>M</th>\n",
        "      <th>F</th>\n",
        "      <th>M</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>year</th>\n",
        "      <th>name</th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th rowspan=\"3\" valign=\"top\">1880</th>\n",
        "      <th>Jane</th>\n",
        "      <td> 20</td>\n",
        "      <td>NaN</td>\n",
        "      <td> 1880</td>\n",
        "      <td>  NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>John</th>\n",
        "      <td>NaN</td>\n",
        "      <td> 13</td>\n",
        "      <td>  NaN</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>Pat</th>\n",
        "      <td> 13</td>\n",
        "      <td> 13</td>\n",
        "      <td> 1880</td>\n",
        "      <td> 1880</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th rowspan=\"2\" valign=\"top\">1881</th>\n",
        "      <th>Jane</th>\n",
        "      <td> 21</td>\n",
        "      <td>NaN</td>\n",
        "      <td> 1881</td>\n",
        "      <td>  NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>John</th>\n",
        "      <td>NaN</td>\n",
        "      <td> 90</td>\n",
        "      <td>  NaN</td>\n",
        "      <td> 1881</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 4 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 25,
       "text": [
        "           births      year      \n",
        "sex             F   M     F     M\n",
        "year name                        \n",
        "1880 Jane      20 NaN  1880   NaN\n",
        "     John     NaN  13   NaN  1880\n",
        "     Pat       13  13  1880  1880\n",
        "1881 Jane      21 NaN  1881   NaN\n",
        "     John     NaN  90   NaN  1881\n",
        "\n",
        "[5 rows x 4 columns]"
       ]
      }
     ],
     "prompt_number": 25
    }
   ],
   "metadata": {}
  }
 ]
}