{ "metadata": { "name": "", "signature": "sha256:56d35a8b5a4b1d6d9786cef5749d25ced30b12fab48e502363ccd61a0a019706" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "A simple demonstration of pivot_table. [Reshaping and Pivot Tables \u2014 pandas 0.13.1 documentation](http://pandas.pydata.org/pandas-docs/stable/reshaping.html)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "from pandas import DataFrame, Series" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "df = DataFrame([{\n", "'year':1880,\n", "'name':'John',\n", "'sex': 'M',\n", "'births': 13\n", "},\n", "{'year':1880,\n", "'name':'Pat',\n", "'sex': 'M',\n", "'births': 13\n", "},\n", "{'year':1880,\n", "'name':'Pat',\n", "'sex': 'F',\n", "'births': 13\n", "},\n", "{\n", "'year':1880,\n", "'name':'Jane',\n", "'sex': 'F',\n", "'births': 20\n", "}, \n", "{\n", "'year':1881,\n", "'name':'John',\n", "'sex': 'M',\n", "'births': 90\n", "},\n", "{\n", "'year':1881,\n", "'name':'Jane',\n", "'sex': 'F',\n", "'births': 21\n", "},])\n", "\n", "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
birthsnamesexyear
0 13 John M 1880
1 13 Pat M 1880
2 13 Pat F 1880
3 20 Jane F 1880
4 90 John M 1881
5 21 Jane F 1881
\n", "

6 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 2, "text": [ " births name sex year\n", "0 13 John M 1880\n", "1 13 Pat M 1880\n", "2 13 Pat F 1880\n", "3 20 Jane F 1880\n", "4 90 John M 1881\n", "5 21 Jane F 1881\n", "\n", "[6 rows x 4 columns]" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "pt = df.pivot_table(rows='year', cols=['name','sex'])['births']\n", "pt" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameJaneJohnPat
sexFMFM
year
1880 20 13 13 13
1881 21 90NaNNaN
\n", "

2 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 3, "text": [ "name Jane John Pat \n", "sex F M F M\n", "year \n", "1880 20 13 13 13\n", "1881 21 90 NaN NaN\n", "\n", "[2 rows x 4 columns]" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "# let's make a new table in which there is M/F subindex for all names\n", "\n", "names = set(pt.columns.get_level_values(level=0))\n", "sexes = set(pt.columns.get_level_values(level=1))\n", "names, sexes" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 4, "text": [ "({'Jane', 'John', 'Pat'}, {'F', 'M'})" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "# http://pandas.pydata.org/pandas-docs/stable/indexing.html#creating-a-multiindex-hierarchical-index-object\n", "\n", "new_index = pd.MultiIndex.from_product([list(names), list(sexes)],\n", " names=['name','sex'])\n", "new_index" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "MultiIndex(levels=[[u'Jane', u'John', u'Pat'], [u'F', u'M']],\n", " labels=[[0, 0, 1, 1, 2, 2], [1, 0, 1, 0, 1, 0]],\n", " names=[u'name', u'sex'])" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html\n", "\n", "pt.T.reindex(new_index).T" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameJaneJohnPat
sexMFMFMF
year
1880NaN 20 13NaN 13 13
1881NaN 21 90NaNNaNNaN
\n", "

2 rows \u00d7 6 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ "name Jane John Pat \n", "sex M F M F M F\n", "year \n", "1880 NaN 20 13 NaN 13 13\n", "1881 NaN 21 90 NaN NaN NaN\n", "\n", "[2 rows x 6 columns]" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "pt.T.reindex(new_index).T.fillna(0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameJaneJohnPat
sexMFMFMF
year
1880 0 20 13 0 13 13
1881 0 21 90 0 0 0
\n", "

2 rows \u00d7 6 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "name Jane John Pat \n", "sex M F M F M F\n", "year \n", "1880 0 20 13 0 13 13\n", "1881 0 21 90 0 0 0\n", "\n", "[2 rows x 6 columns]" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum'))" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
birthsyear
yearnamesex
1880JaneF 20 1880
JohnM 13 1880
PatF 13 1880
M 13 1880
1881JaneF 21 1881
JohnM 90 1881
\n", "

6 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 19, "text": [ " births year\n", "year name sex \n", "1880 Jane F 20 1880\n", " John M 13 1880\n", " Pat F 13 1880\n", " M 13 1880\n", "1881 Jane F 21 1881\n", " John M 90 1881\n", "\n", "[6 rows x 2 columns]" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "df.groupby('year').apply(lambda s: s.groupby(['name','sex']).agg('sum')).unstack()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
birthsyear
sexFMFM
yearname
1880Jane 20NaN 1880 NaN
JohnNaN 13 NaN 1880
Pat 13 13 1880 1880
1881Jane 21NaN 1881 NaN
JohnNaN 90 NaN 1881
\n", "

5 rows \u00d7 4 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ " births year \n", "sex F M F M\n", "year name \n", "1880 Jane 20 NaN 1880 NaN\n", " John NaN 13 NaN 1880\n", " Pat 13 13 1880 1880\n", "1881 Jane 21 NaN 1881 NaN\n", " John NaN 90 NaN 1881\n", "\n", "[5 rows x 4 columns]" ] } ], "prompt_number": 25 } ], "metadata": {} } ] }