{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# CHAPTER 7" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Wrangling: Clean, Transform, Merge, Reshape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combing and merging data sets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Database-style Dataframe merges" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", "from pandas import DataFrame\n", "from pandas import Series\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df1 = DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],\n", " 'data1': range(7)})" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df2 = DataFrame({'key':['a', 'b', 'd'],\n", " 'data2': range(3)})" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1key
00b
11b
22a
33c
44a
55a
66b
\n", "
" ], "text/plain": [ " data1 key\n", "0 0 b\n", "1 1 b\n", "2 2 a\n", "3 3 c\n", "4 4 a\n", "5 5 a\n", "6 6 b" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data2key
00a
11b
22d
\n", "
" ], "text/plain": [ " data2 key\n", "0 0 a\n", "1 1 b\n", "2 2 d" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
00b1
11b1
26b1
32a0
44a0
55a0
\n", "
" ], "text/plain": [ " data1 key data2\n", "0 0 b 1\n", "1 1 b 1\n", "2 6 b 1\n", "3 2 a 0\n", "4 4 a 0\n", "5 5 a 0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(df1, df2)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
00b1
11b1
26b1
32a0
44a0
55a0
\n", "
" ], "text/plain": [ " data1 key data2\n", "0 0 b 1\n", "1 1 b 1\n", "2 6 b 1\n", "3 2 a 0\n", "4 4 a 0\n", "5 5 a 0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(df1, df2, on = 'key')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df3 = DataFrame({'lkey': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],\n", " 'data1': range(7)})" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1lkey
00b
11b
22a
33c
44a
55a
66b
\n", "
" ], "text/plain": [ " data1 lkey\n", "0 0 b\n", "1 1 b\n", "2 2 a\n", "3 3 c\n", "4 4 a\n", "5 5 a\n", "6 6 b" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df4 = DataFrame({'rkey':['a', 'b', 'd'],\n", " 'data2': range(3)})" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data2rkey
00a
11b
22d
\n", "
" ], "text/plain": [ " data2 rkey\n", "0 0 a\n", "1 1 b\n", "2 2 d" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1lkeydata2rkey
00b1b
11b1b
26b1b
32a0a
44a0a
55a0a
\n", "
" ], "text/plain": [ " data1 lkey data2 rkey\n", "0 0 b 1 b\n", "1 1 b 1 b\n", "2 6 b 1 b\n", "3 2 a 0 a\n", "4 4 a 0 a\n", "5 5 a 0 a" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(df3, df4, left_on='lkey', right_on='rkey')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
00.0b1.0
11.0b1.0
26.0b1.0
32.0a0.0
44.0a0.0
55.0a0.0
63.0cNaN
7NaNd2.0
\n", "
" ], "text/plain": [ " data1 key data2\n", "0 0.0 b 1.0\n", "1 1.0 b 1.0\n", "2 6.0 b 1.0\n", "3 2.0 a 0.0\n", "4 4.0 a 0.0\n", "5 5.0 a 0.0\n", "6 3.0 c NaN\n", "7 NaN d 2.0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(df1, df2, how = 'outer')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1key
00b
11b
22a
33c
44a
55b
\n", "
" ], "text/plain": [ " data1 key\n", "0 0 b\n", "1 1 b\n", "2 2 a\n", "3 3 c\n", "4 4 a\n", "5 5 b" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'],\n", " 'data1': range(6)})\n", "df1" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data2key
00a
11b
22a
33b
44d
\n", "
" ], "text/plain": [ " data2 key\n", "0 0 a\n", "1 1 b\n", "2 2 a\n", "3 3 b\n", "4 4 d" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = DataFrame({'key':['a', 'b', 'a', 'b', 'd'],\n", " 'data2': range(5)})\n", "df2" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
00b1.0
10b3.0
21b1.0
31b3.0
42a0.0
52a2.0
63cNaN
74a0.0
84a2.0
95b1.0
105b3.0
\n", "
" ], "text/plain": [ " data1 key data2\n", "0 0 b 1.0\n", "1 0 b 3.0\n", "2 1 b 1.0\n", "3 1 b 3.0\n", "4 2 a 0.0\n", "5 2 a 2.0\n", "6 3 c NaN\n", "7 4 a 0.0\n", "8 4 a 2.0\n", "9 5 b 1.0\n", "10 5 b 3.0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(df1, df2, on = 'key', how = 'left')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
00.0b1
11.0b1
25.0b1
30.0b3
41.0b3
55.0b3
62.0a0
74.0a0
82.0a2
94.0a2
10NaNd4
\n", "
" ], "text/plain": [ " data1 key data2\n", "0 0.0 b 1\n", "1 1.0 b 1\n", "2 5.0 b 1\n", "3 0.0 b 3\n", "4 1.0 b 3\n", "5 5.0 b 3\n", "6 2.0 a 0\n", "7 4.0 a 0\n", "8 2.0 a 2\n", "9 4.0 a 2\n", "10 NaN d 4" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(df1, df2, on = 'key', how = 'right')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1keydata2
00b1
10b3
21b1
31b3
45b1
55b3
62a0
72a2
84a0
94a2
\n", "
" ], "text/plain": [ " data1 key data2\n", "0 0 b 1\n", "1 0 b 3\n", "2 1 b 1\n", "3 1 b 3\n", "4 5 b 1\n", "5 5 b 3\n", "6 2 a 0\n", "7 2 a 2\n", "8 4 a 0\n", "9 4 a 2" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(df1, df2, on = 'key', how = 'inner')" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2lval
0fooone1
1footwo2
2barone3
\n", "
" ], "text/plain": [ " key1 key2 lval\n", "0 foo one 1\n", "1 foo two 2\n", "2 bar one 3" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left = DataFrame({'key1': ['foo', 'foo', 'bar'],\n", " 'key2': ['one', 'two', 'one'],\n", " 'lval': [1, 2, 3]})\n", "left" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2rval
0fooone4
1fooone5
2barone6
3bartwo7
\n", "
" ], "text/plain": [ " key1 key2 rval\n", "0 foo one 4\n", "1 foo one 5\n", "2 bar one 6\n", "3 bar two 7" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "right = DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'],\n", " 'key2': ['one', 'one', 'one', 'two'],\n", " 'rval': [4, 5, 6, 7]})\n", "right" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2lvalrval
0fooone1.04.0
1fooone1.05.0
2footwo2.0NaN
3barone3.06.0
4bartwoNaN7.0
\n", "
" ], "text/plain": [ " key1 key2 lval rval\n", "0 foo one 1.0 4.0\n", "1 foo one 1.0 5.0\n", "2 foo two 2.0 NaN\n", "3 bar one 3.0 6.0\n", "4 bar two NaN 7.0" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left, right, on = ['key1', 'key2'], how = 'outer')" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2lvalrval
0fooone14
1fooone15
2barone36
\n", "
" ], "text/plain": [ " key1 key2 lval rval\n", "0 foo one 1 4\n", "1 foo one 1 5\n", "2 bar one 3 6" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left, right, on = ['key1', 'key2'], how = 'inner')" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2lvalrval
0fooone14.0
1fooone15.0
2footwo2NaN
3barone36.0
\n", "
" ], "text/plain": [ " key1 key2 lval rval\n", "0 foo one 1 4.0\n", "1 foo one 1 5.0\n", "2 foo two 2 NaN\n", "3 bar one 3 6.0" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left, right, on = ['key1', 'key2'], how = 'left')" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2lvalrval
0fooone1.04
1fooone1.05
2barone3.06
3bartwoNaN7
\n", "
" ], "text/plain": [ " key1 key2 lval rval\n", "0 foo one 1.0 4\n", "1 foo one 1.0 5\n", "2 bar one 3.0 6\n", "3 bar two NaN 7" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left, right, on = ['key1', 'key2'], how = 'right')" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2_xlvalkey2_yrval
0fooone1one4
1fooone1one5
2footwo2one4
3footwo2one5
4barone3one6
5barone3two7
\n", "
" ], "text/plain": [ " key1 key2_x lval key2_y rval\n", "0 foo one 1 one 4\n", "1 foo one 1 one 5\n", "2 foo two 2 one 4\n", "3 foo two 2 one 5\n", "4 bar one 3 one 6\n", "5 bar one 3 two 7" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left, right, on = 'key1')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key1key2_leftlvalkey2_rightrval
0fooone1one4
1fooone1one5
2footwo2one4
3footwo2one5
4barone3one6
5barone3two7
\n", "
" ], "text/plain": [ " key1 key2_left lval key2_right rval\n", "0 foo one 1 one 4\n", "1 foo one 1 one 5\n", "2 foo two 2 one 4\n", "3 foo two 2 one 5\n", "4 bar one 3 one 6\n", "5 bar one 3 two 7" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left, right, on = 'key1', suffixes=('_left', '_right'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merging on index" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvalue
0a0
1b1
2a2
3a3
4b4
5c5
\n", "
" ], "text/plain": [ " key value\n", "0 a 0\n", "1 b 1\n", "2 a 2\n", "3 a 3\n", "4 b 4\n", "5 c 5" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left1 = DataFrame({'key': ['a', 'b', 'a', 'a', 'b', 'c'],\n", " 'value': range(6)})\n", "left1" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
group_val
a3.5
b7.0
\n", "
" ], "text/plain": [ " group_val\n", "a 3.5\n", "b 7.0" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "right1 = DataFrame({'group_val': [3.5, 7]}, index = ['a', 'b'])\n", "right1" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvaluegroup_val
0a03.5
2a23.5
3a33.5
1b17.0
4b47.0
\n", "
" ], "text/plain": [ " key value group_val\n", "0 a 0 3.5\n", "2 a 2 3.5\n", "3 a 3 3.5\n", "1 b 1 7.0\n", "4 b 4 7.0" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left1, right1, left_on='key', right_index = True)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvaluegroup_val
0a03.5
2a23.5
3a33.5
1b17.0
4b47.0
5c5NaN
\n", "
" ], "text/plain": [ " key value group_val\n", "0 a 0 3.5\n", "2 a 2 3.5\n", "3 a 3 3.5\n", "1 b 1 7.0\n", "4 b 4 7.0\n", "5 c 5 NaN" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left1, right1, left_on='key', right_index = True, how = 'outer')" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datakey1key2
00.0Ohio2002
11.0Ohio2001
22.0Ohio2002
33.0Nevada2001
44.0Nevada2002
\n", "
" ], "text/plain": [ " data key1 key2\n", "0 0.0 Ohio 2002\n", "1 1.0 Ohio 2001\n", "2 2.0 Ohio 2002\n", "3 3.0 Nevada 2001\n", "4 4.0 Nevada 2002" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lefth = DataFrame({'key1': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],\n", " 'key2': [2002, 2001, 2002, 2001, 2002],\n", " 'data': np.arange(5.)})\n", "lefth" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event1event2
Nevada200101
200023
Ohio200045
200067
200189
20021011
\n", "
" ], "text/plain": [ " event1 event2\n", "Nevada 2001 0 1\n", " 2000 2 3\n", "Ohio 2000 4 5\n", " 2000 6 7\n", " 2001 8 9\n", " 2002 10 11" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "righth = DataFrame(np.arange(12).reshape(6, 2),\n", " index = [['Nevada', 'Nevada', 'Ohio', 'Ohio', 'Ohio', 'Ohio'],\n", " [2001, 2000, 2000, 2000, 2001, 2002]],\n", " columns = ['event1', 'event2'])\n", "righth" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datakey1key2event1event2
00.0Ohio20021011
22.0Ohio20021011
11.0Ohio200189
33.0Nevada200101
\n", "
" ], "text/plain": [ " data key1 key2 event1 event2\n", "0 0.0 Ohio 2002 10 11\n", "2 2.0 Ohio 2002 10 11\n", "1 1.0 Ohio 2001 8 9\n", "3 3.0 Nevada 2001 0 1" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(lefth, righth, left_on=['key1', 'key2'], right_index=True)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datakey1key2event1event2
00.0Ohio2002.010.011.0
22.0Ohio2002.010.011.0
11.0Ohio2001.08.09.0
33.0Nevada2001.00.01.0
44.0Nevada2002.0NaNNaN
4NaNNevada2000.02.03.0
4NaNOhio2000.04.05.0
4NaNOhio2000.06.07.0
\n", "
" ], "text/plain": [ " data key1 key2 event1 event2\n", "0 0.0 Ohio 2002.0 10.0 11.0\n", "2 2.0 Ohio 2002.0 10.0 11.0\n", "1 1.0 Ohio 2001.0 8.0 9.0\n", "3 3.0 Nevada 2001.0 0.0 1.0\n", "4 4.0 Nevada 2002.0 NaN NaN\n", "4 NaN Nevada 2000.0 2.0 3.0\n", "4 NaN Ohio 2000.0 4.0 5.0\n", "4 NaN Ohio 2000.0 6.0 7.0" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(lefth, righth, left_on=['key1', 'key2'], right_index=True, how = 'outer')" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevada
a1.02.0
c3.04.0
e5.06.0
\n", "
" ], "text/plain": [ " Ohio Nevada\n", "a 1.0 2.0\n", "c 3.0 4.0\n", "e 5.0 6.0" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left2 = DataFrame([[1., 2.], [3., 4.], [5., 6.]], index = ['a', 'c', 'e'],\n", " columns = ['Ohio', 'Nevada'])\n", "left2" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MissouriAlabama
b7.08.0
c9.010.0
d11.012.0
e13.014.0
\n", "
" ], "text/plain": [ " Missouri Alabama\n", "b 7.0 8.0\n", "c 9.0 10.0\n", "d 11.0 12.0\n", "e 13.0 14.0" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "right2 = DataFrame([[7., 8.], [9., 10.], [11., 12.], [13., 14]],\n", " index = ['b', 'c', 'd', 'e'],\n", " columns = ['Missouri', 'Alabama'])\n", "right2" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabama
a1.02.0NaNNaN
bNaNNaN7.08.0
c3.04.09.010.0
dNaNNaN11.012.0
e5.06.013.014.0
\n", "
" ], "text/plain": [ " Ohio Nevada Missouri Alabama\n", "a 1.0 2.0 NaN NaN\n", "b NaN NaN 7.0 8.0\n", "c 3.0 4.0 9.0 10.0\n", "d NaN NaN 11.0 12.0\n", "e 5.0 6.0 13.0 14.0" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(left2, right2, how = 'outer', left_index=True, right_index=True)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabama
a1.02.0NaNNaN
bNaNNaN7.08.0
c3.04.09.010.0
dNaNNaN11.012.0
e5.06.013.014.0
\n", "
" ], "text/plain": [ " Ohio Nevada Missouri Alabama\n", "a 1.0 2.0 NaN NaN\n", "b NaN NaN 7.0 8.0\n", "c 3.0 4.0 9.0 10.0\n", "d NaN NaN 11.0 12.0\n", "e 5.0 6.0 13.0 14.0" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left2.join(right2, how = 'outer')" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
keyvaluegroup_val
0a03.5
1b17.0
2a23.5
3a33.5
4b47.0
5c5NaN
\n", "
" ], "text/plain": [ " key value group_val\n", "0 a 0 3.5\n", "1 b 1 7.0\n", "2 a 2 3.5\n", "3 a 3 3.5\n", "4 b 4 7.0\n", "5 c 5 NaN" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left1.join(right1, on = 'key')" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
New YorkOregon
a7.08.0
c9.010.0
e11.012.0
f16.017.0
\n", "
" ], "text/plain": [ " New York Oregon\n", "a 7.0 8.0\n", "c 9.0 10.0\n", "e 11.0 12.0\n", "f 16.0 17.0" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "another = DataFrame([[7., 8.], [9., 10.], [11., 12.], [16., 17.]],\n", " index = ['a', 'c', 'e', 'f'],\n", " columns = ['New York', 'Oregon'])\n", "another" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabamaNew YorkOregon
a1.02.0NaNNaN7.08.0
c3.04.09.010.09.010.0
e5.06.013.014.011.012.0
\n", "
" ], "text/plain": [ " Ohio Nevada Missouri Alabama New York Oregon\n", "a 1.0 2.0 NaN NaN 7.0 8.0\n", "c 3.0 4.0 9.0 10.0 9.0 10.0\n", "e 5.0 6.0 13.0 14.0 11.0 12.0" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left2.join([right2, another])" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioNevadaMissouriAlabamaNew YorkOregon
a1.02.0NaNNaN7.08.0
bNaNNaN7.08.0NaNNaN
c3.04.09.010.09.010.0
dNaNNaN11.012.0NaNNaN
e5.06.013.014.011.012.0
fNaNNaNNaNNaN16.017.0
\n", "
" ], "text/plain": [ " Ohio Nevada Missouri Alabama New York Oregon\n", "a 1.0 2.0 NaN NaN 7.0 8.0\n", "b NaN NaN 7.0 8.0 NaN NaN\n", "c 3.0 4.0 9.0 10.0 9.0 10.0\n", "d NaN NaN 11.0 12.0 NaN NaN\n", "e 5.0 6.0 13.0 14.0 11.0 12.0\n", "f NaN NaN NaN NaN 16.0 17.0" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "left2.join([right2, another], how = 'outer')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Concatenating along an axis" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 1, 2, 3],\n", " [ 4, 5, 6, 7],\n", " [ 8, 9, 10, 11]])" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr = np.arange(12).reshape(3, 4)\n", "arr" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 1, 2, 3, 0, 1, 2, 3],\n", " [ 4, 5, 6, 7, 4, 5, 6, 7],\n", " [ 8, 9, 10, 11, 8, 9, 10, 11]])" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.concatenate([arr, arr], axis = 1)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "a 0\n", "b 1\n", "dtype: int64" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1 = Series([0, 1], index = ['a', 'b'])\n", "s1" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "c 2\n", "d 3\n", "e 4\n", "dtype: int64" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2 = Series([2, 3, 4], index = ['c', 'd', 'e'])\n", "s2" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "f 5\n", "g 6\n", "dtype: int64" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s3 = Series([5, 6], index = ['f', 'g'])\n", "s3" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "a 0\n", "b 1\n", "c 2\n", "d 3\n", "e 4\n", "f 5\n", "g 6\n", "dtype: int64" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([s1, s2, s3])" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
a0.0NaNNaN
b1.0NaNNaN
cNaN2.0NaN
dNaN3.0NaN
eNaN4.0NaN
fNaNNaN5.0
gNaNNaN6.0
\n", "
" ], "text/plain": [ " 0 1 2\n", "a 0.0 NaN NaN\n", "b 1.0 NaN NaN\n", "c NaN 2.0 NaN\n", "d NaN 3.0 NaN\n", "e NaN 4.0 NaN\n", "f NaN NaN 5.0\n", "g NaN NaN 6.0" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([s1, s2, s3], axis = 1)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "a 0\n", "b 5\n", "f 5\n", "g 6\n", "dtype: int64" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s4 = pd.concat([s1*5, s3])\n", "s4" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
a0.00
b1.05
fNaN5
gNaN6
\n", "
" ], "text/plain": [ " 0 1\n", "a 0.0 0\n", "b 1.0 5\n", "f NaN 5\n", "g NaN 6" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([s1, s4], axis = 1)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
a00
b15
\n", "
" ], "text/plain": [ " 0 1\n", "a 0 0\n", "b 1 5" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([s1, s4], axis = 1, join = 'inner')" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
a0.00.0
cNaNNaN
b1.05.0
eNaNNaN
\n", "
" ], "text/plain": [ " 0 1\n", "a 0.0 0.0\n", "c NaN NaN\n", "b 1.0 5.0\n", "e NaN NaN" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([s1, s4], axis = 1, join_axes=[['a', 'c', 'b', 'e']])" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "a 0\n", "b 1\n", "c 2\n", "d 3\n", "e 4\n", "f 5\n", "g 6\n", "dtype: int64" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([s1, s2, s3])" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "one a 0\n", " b 1\n", "two a 0\n", " b 1\n", "three f 5\n", " g 6\n", "dtype: int64" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result = pd.concat([s1, s1, s3], keys = ['one', 'two', 'three'])\n", "result" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abfg
one0.01.0NaNNaN
two0.01.0NaNNaN
threeNaNNaN5.06.0
\n", "
" ], "text/plain": [ " a b f g\n", "one 0.0 1.0 NaN NaN\n", "two 0.0 1.0 NaN NaN\n", "three NaN NaN 5.0 6.0" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.unstack()" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothree
a0.00.0NaN
b1.01.0NaN
fNaNNaN5.0
gNaNNaN6.0
\n", "
" ], "text/plain": [ " one two three\n", "a 0.0 0.0 NaN\n", "b 1.0 1.0 NaN\n", "f NaN NaN 5.0\n", "g NaN NaN 6.0" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([s1, s1, s3], axis = 1, keys = ['one', 'two', 'three'])" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwo
a01
b23
c45
\n", "
" ], "text/plain": [ " one two\n", "a 0 1\n", "b 2 3\n", "c 4 5" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = DataFrame(np.arange(6).reshape(3, 2), index = ['a', 'b', 'c'], \n", " columns = ['one', 'two'])\n", "df1" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
threefour
a56
c78
\n", "
" ], "text/plain": [ " three four\n", "a 5 6\n", "c 7 8" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = DataFrame(5 + np.arange(4).reshape(2, 2), index = ['a', 'c'],\n", " columns = ['three', 'four'])\n", "df2" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
level1level2
onetwothreefour
a015.06.0
b23NaNNaN
c457.08.0
\n", "
" ], "text/plain": [ " level1 level2 \n", " one two three four\n", "a 0 1 5.0 6.0\n", "b 2 3 NaN NaN\n", "c 4 5 7.0 8.0" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([df1, df2], axis = 1, keys = ['level1', 'level2'])" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
level1level2
onetwothreefour
a015.06.0
b23NaNNaN
c457.08.0
\n", "
" ], "text/plain": [ " level1 level2 \n", " one two three four\n", "a 0 1 5.0 6.0\n", "b 2 3 NaN NaN\n", "c 4 5 7.0 8.0" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat({'level1': df1, 'level2': df2}, axis = 1)" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fouronethreetwo
level1aNaN0.0NaN1.0
bNaN2.0NaN3.0
cNaN4.0NaN5.0
level2a6.0NaN5.0NaN
c8.0NaN7.0NaN
\n", "
" ], "text/plain": [ " four one three two\n", "level1 a NaN 0.0 NaN 1.0\n", " b NaN 2.0 NaN 3.0\n", " c NaN 4.0 NaN 5.0\n", "level2 a 6.0 NaN 5.0 NaN\n", " c 8.0 NaN 7.0 NaN" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat({'level1': df1, 'level2': df2})" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
upperlevel1level2
loweronetwothreefour
a015.06.0
b23NaNNaN
c457.08.0
\n", "
" ], "text/plain": [ "upper level1 level2 \n", "lower one two three four\n", "a 0 1 5.0 6.0\n", "b 2 3 NaN NaN\n", "c 4 5 7.0 8.0" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([df1, df2], axis = 1, keys = ['level1', 'level2'], names = ['upper', 'lower'])" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
0-0.819814-0.4315460.3550410.251668
11.6439740.080742-0.8070190.715589
2-0.5073601.320002-0.914798-0.004036
\n", "
" ], "text/plain": [ " a b c d\n", "0 -0.819814 -0.431546 0.355041 0.251668\n", "1 1.643974 0.080742 -0.807019 0.715589\n", "2 -0.507360 1.320002 -0.914798 -0.004036" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = DataFrame(np.random.randn(3, 4), columns = ['a', 'b', 'c', 'd'])\n", "df1" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bda
0-1.002213-0.562795-1.139098
11.360717-0.4825841.083755
\n", "
" ], "text/plain": [ " b d a\n", "0 -1.002213 -0.562795 -1.139098\n", "1 1.360717 -0.482584 1.083755" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = DataFrame(np.random.randn(2, 3), columns = ['b', 'd', 'a'])\n", "df2" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
0-0.819814-0.4315460.3550410.251668
11.6439740.080742-0.8070190.715589
2-0.5073601.320002-0.914798-0.004036
3-1.139098-1.002213NaN-0.562795
41.0837551.360717NaN-0.482584
\n", "
" ], "text/plain": [ " a b c d\n", "0 -0.819814 -0.431546 0.355041 0.251668\n", "1 1.643974 0.080742 -0.807019 0.715589\n", "2 -0.507360 1.320002 -0.914798 -0.004036\n", "3 -1.139098 -1.002213 NaN -0.562795\n", "4 1.083755 1.360717 NaN -0.482584" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.concat([df1, df2], ignore_index=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Combining data with overlap" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "f NaN\n", "e 2.5\n", "d NaN\n", "c 3.5\n", "b 4.5\n", "a NaN\n", "dtype: float64" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan],\n", " index = ['f', 'e', 'd', 'c', 'b', 'a'])\n", "a" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "f 0.0\n", "e 1.0\n", "d 2.0\n", "c 3.0\n", "b 4.0\n", "a 5.0\n", "dtype: float64" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b = Series(np.arange(len(a), dtype = np.float64),\n", " index = ['f', 'e', 'd', 'c', 'b', 'a'])\n", "b" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [], "source": [ "b[-1] = np.nan" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "f 0.0\n", "e 1.0\n", "d 2.0\n", "c 3.0\n", "b 4.0\n", "a NaN\n", "dtype: float64" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([ 0. , 2.5, 2. , 3.5, 4.5, nan])" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.where(pd.isnull(a), b, a)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "f 0.0\n", "e 1.0\n", "d 2.0\n", "c 3.0\n", "dtype: float64" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b[:-2]" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "d NaN\n", "c 3.5\n", "b 4.5\n", "a NaN\n", "dtype: float64" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a[2:]" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "a NaN\n", "b 4.5\n", "c 3.0\n", "d 2.0\n", "e 1.0\n", "f 0.0\n", "dtype: float64" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "b[:-2].combine_first(a[2:])" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
01.0NaN2
1NaN2.06
25.0NaN10
3NaN6.014
\n", "
" ], "text/plain": [ " a b c\n", "0 1.0 NaN 2\n", "1 NaN 2.0 6\n", "2 5.0 NaN 10\n", "3 NaN 6.0 14" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = DataFrame({'a': [1., np.nan, 5, np.nan],\n", " 'b': [np.nan, 2., np.nan, 6],\n", " 'c': range(2, 18, 4)})\n", "df1" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
05.0NaN
14.03.0
2NaN4.0
33.06.0
47.08.0
\n", "
" ], "text/plain": [ " a b\n", "0 5.0 NaN\n", "1 4.0 3.0\n", "2 NaN 4.0\n", "3 3.0 6.0\n", "4 7.0 8.0" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = DataFrame({'a': [5., 4., np.nan, 3., 7.],\n", " 'b': [np.nan, 3., 4., 6., 8.]})\n", "df2" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
01.0NaN2.0
14.02.06.0
25.04.010.0
33.06.014.0
47.08.0NaN
\n", "
" ], "text/plain": [ " a b c\n", "0 1.0 NaN 2.0\n", "1 4.0 2.0 6.0\n", "2 5.0 4.0 10.0\n", "3 3.0 6.0 14.0\n", "4 7.0 8.0 NaN" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.combine_first(df2)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
05.0NaN2.0
14.03.06.0
25.04.010.0
33.06.014.0
47.08.0NaN
\n", "
" ], "text/plain": [ " a b c\n", "0 5.0 NaN 2.0\n", "1 4.0 3.0 6.0\n", "2 5.0 4.0 10.0\n", "3 3.0 6.0 14.0\n", "4 7.0 8.0 NaN" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.combine_first(df1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Reshaping and Pivoting" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Reshaping with hierarchical indexing" ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
numberonetwothree
state
Ohio012
Colorado345
\n", "
" ], "text/plain": [ "number one two three\n", "state \n", "Ohio 0 1 2\n", "Colorado 3 4 5" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = DataFrame(np.arange(6).reshape(2, 3),\n", " index = pd.Index(['Ohio', 'Colorado'], name = 'state'),\n", " columns = pd.Index(['one', 'two', 'three'], name = 'number'))\n", "data" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "state number\n", "Ohio one 0\n", " two 1\n", " three 2\n", "Colorado one 3\n", " two 4\n", " three 5\n", "dtype: int64" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result = data.stack()\n", "result" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
numberonetwothree
state
Ohio012
Colorado345
\n", "
" ], "text/plain": [ "number one two three\n", "state \n", "Ohio 0 1 2\n", "Colorado 3 4 5" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.unstack()" ] }, { "cell_type": "code", "execution_count": 82, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
number
one03
two14
three25
\n", "
" ], "text/plain": [ "state Ohio Colorado\n", "number \n", "one 0 3\n", "two 1 4\n", "three 2 5" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.unstack(0)" ] }, { "cell_type": "code", "execution_count": 83, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
numberonetwothree
state
Ohio012
Colorado345
\n", "
" ], "text/plain": [ "number one two three\n", "state \n", "Ohio 0 1 2\n", "Colorado 3 4 5" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.unstack(1)" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
number
one03
two14
three25
\n", "
" ], "text/plain": [ "state Ohio Colorado\n", "number \n", "one 0 3\n", "two 1 4\n", "three 2 5" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.unstack('state')" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "a 0\n", "b 1\n", "c 2\n", "d 3\n", "dtype: int64" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s1 = Series([0, 1, 2, 3], index = ['a', 'b', 'c', 'd'])\n", "s1" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "c 4\n", "d 5\n", "e 6\n", "dtype: int64" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "s2 = Series([4, 5, 6], index = ['c', 'd', 'e'])\n", "s2" ] }, { "cell_type": "code", "execution_count": 87, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "one a 0\n", " b 1\n", " c 2\n", " d 3\n", "two c 4\n", " d 5\n", " e 6\n", "dtype: int64" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2 = pd.concat([s1, s2], keys = ['one', 'two'])\n", "data2" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcde
one0.01.02.03.0NaN
twoNaNNaN4.05.06.0
\n", "
" ], "text/plain": [ " a b c d e\n", "one 0.0 1.0 2.0 3.0 NaN\n", "two NaN NaN 4.0 5.0 6.0" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2.unstack()" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "one a 0.0\n", " b 1.0\n", " c 2.0\n", " d 3.0\n", "two c 4.0\n", " d 5.0\n", " e 6.0\n", "dtype: float64" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2.unstack().stack()" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "one a 0.0\n", " b 1.0\n", " c 2.0\n", " d 3.0\n", " e NaN\n", "two a NaN\n", " b NaN\n", " c 4.0\n", " d 5.0\n", " e 6.0\n", "dtype: float64" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data2.unstack().stack(dropna = False)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sideleftright
statenumber
Ohioone05
two16
three27
Coloradoone38
two49
three510
\n", "
" ], "text/plain": [ "side left right\n", "state number \n", "Ohio one 0 5\n", " two 1 6\n", " three 2 7\n", "Colorado one 3 8\n", " two 4 9\n", " three 5 10" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = DataFrame({'left': result, 'right': result + 5},\n", " columns = pd.Index(['left', 'right'], name = 'side'))\n", "df" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sideleftright
stateOhioColoradoOhioColorado
number
one0358
two1469
three25710
\n", "
" ], "text/plain": [ "side left right \n", "state Ohio Colorado Ohio Colorado\n", "number \n", "one 0 3 5 8\n", "two 1 4 6 9\n", "three 2 5 7 10" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.unstack('state')" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
numberside
oneleft03
right58
twoleft14
right69
threeleft25
right710
\n", "
" ], "text/plain": [ "state Ohio Colorado\n", "number side \n", "one left 0 3\n", " right 5 8\n", "two left 1 4\n", " right 6 9\n", "three left 2 5\n", " right 7 10" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.unstack('state').stack('side')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Pivoting 'Long' to 'wide' format" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data transformation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Removing duplicates" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2
0one1
1one1
2one2
3two3
4two3
5two4
6two4
\n", "
" ], "text/plain": [ " k1 k2\n", "0 one 1\n", "1 one 1\n", "2 one 2\n", "3 two 3\n", "4 two 3\n", "5 two 4\n", "6 two 4" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = DataFrame({'k1': ['one'] * 3 + ['two'] * 4,\n", " 'k2': [1, 1, 2, 3, 3, 4, 4]})\n", "data" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 True\n", "2 False\n", "3 False\n", "4 True\n", "5 False\n", "6 True\n", "dtype: bool" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.duplicated()" ] }, { "cell_type": "code", "execution_count": 96, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2
0one1
2one2
3two3
5two4
\n", "
" ], "text/plain": [ " k1 k2\n", "0 one 1\n", "2 one 2\n", "3 two 3\n", "5 two 4" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 97, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data['v1'] = range(7)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
0one10
1one11
2one22
3two33
4two34
5two45
6two46
\n", "
" ], "text/plain": [ " k1 k2 v1\n", "0 one 1 0\n", "1 one 1 1\n", "2 one 2 2\n", "3 two 3 3\n", "4 two 3 4\n", "5 two 4 5\n", "6 two 4 6" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
0one10
3two33
\n", "
" ], "text/plain": [ " k1 k2 v1\n", "0 one 1 0\n", "3 two 3 3" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.drop_duplicates(['k1'])" ] }, { "cell_type": "code", "execution_count": 100, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
0one10
2one22
3two33
5two45
\n", "
" ], "text/plain": [ " k1 k2 v1\n", "0 one 1 0\n", "2 one 2 2\n", "3 two 3 3\n", "5 two 4 5" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.drop_duplicates(['k1', 'k2'])" ] }, { "cell_type": "code", "execution_count": 102, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
k1k2v1
1one11
2one22
4two34
6two46
\n", "
" ], "text/plain": [ " k1 k2 v1\n", "1 one 1 1\n", "2 one 2 2\n", "4 two 3 4\n", "6 two 4 6" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.drop_duplicates(['k1', 'k2'], keep='last')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Transforming data using a function or mapping" ] }, { "cell_type": "code", "execution_count": 110, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
foodounces
0bacon4.0
1pulled pork3.0
2bacon12.0
3Pastrami6.0
4corned beef7.5
5Bacon8.0
6pastrami3.0
7honey ham5.0
8nova lox6.0
\n", "
" ], "text/plain": [ " food ounces\n", "0 bacon 4.0\n", "1 pulled pork 3.0\n", "2 bacon 12.0\n", "3 Pastrami 6.0\n", "4 corned beef 7.5\n", "5 Bacon 8.0\n", "6 pastrami 3.0\n", "7 honey ham 5.0\n", "8 nova lox 6.0" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = DataFrame({'food': ['bacon', 'pulled pork', 'bacon', 'Pastrami',\n", " 'corned beef', 'Bacon', 'pastrami', 'honey ham',\n", " 'nova lox'],\n", " 'ounces': [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})\n", "data" ] }, { "cell_type": "code", "execution_count": 111, "metadata": { "collapsed": false }, "outputs": [], "source": [ "meat_to_animal = {'bacon': 'pig',\n", " 'pulled pork': 'pig',\n", " 'pastrami': 'cow',\n", " 'corned beef': 'cow',\n", " 'honey ham': 'pig',\n", " 'nova lox': 'salmon'\n", " }" ] }, { "cell_type": "code", "execution_count": 112, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
foodouncesanimal
0bacon4.0pig
1pulled pork3.0pig
2bacon12.0pig
3Pastrami6.0cow
4corned beef7.5cow
5Bacon8.0pig
6pastrami3.0cow
7honey ham5.0pig
8nova lox6.0salmon
\n", "
" ], "text/plain": [ " food ounces animal\n", "0 bacon 4.0 pig\n", "1 pulled pork 3.0 pig\n", "2 bacon 12.0 pig\n", "3 Pastrami 6.0 cow\n", "4 corned beef 7.5 cow\n", "5 Bacon 8.0 pig\n", "6 pastrami 3.0 cow\n", "7 honey ham 5.0 pig\n", "8 nova lox 6.0 salmon" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data['animal'] = data['food'].map(str.lower).map(meat_to_animal)\n", "data" ] }, { "cell_type": "code", "execution_count": 113, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 pig\n", "1 pig\n", "2 pig\n", "3 cow\n", "4 cow\n", "5 pig\n", "6 cow\n", "7 pig\n", "8 salmon\n", "Name: food, dtype: object" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data['food'].map(lambda x: meat_to_animal[x.lower()])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Replacing values" ] }, { "cell_type": "code", "execution_count": 114, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1.0\n", "1 -999.0\n", "2 2.0\n", "3 -999.0\n", "4 -1000.0\n", "5 3.0\n", "dtype: float64" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = Series([1., -999, 2, -999, -1000, 3])\n", "data" ] }, { "cell_type": "code", "execution_count": 115, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1.0\n", "1 NaN\n", "2 2.0\n", "3 NaN\n", "4 -1000.0\n", "5 3.0\n", "dtype: float64" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.replace(-999, np.nan)" ] }, { "cell_type": "code", "execution_count": 116, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1.0\n", "1 NaN\n", "2 2.0\n", "3 NaN\n", "4 NaN\n", "5 3.0\n", "dtype: float64" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.replace([-999, -1000], np.nan)" ] }, { "cell_type": "code", "execution_count": 117, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1.0\n", "1 NaN\n", "2 2.0\n", "3 NaN\n", "4 0.0\n", "5 3.0\n", "dtype: float64" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.replace([-999, -1000], [np.nan, 0])" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 1.0\n", "1 NaN\n", "2 2.0\n", "3 NaN\n", "4 0.0\n", "5 3.0\n", "dtype: float64" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.replace({-999: np.nan, -1000: 0})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Renaming Axis Indexes" ] }, { "cell_type": "code", "execution_count": 119, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Ohio0123
Colorado4567
New York891011
\n", "
" ], "text/plain": [ " one two three four\n", "Ohio 0 1 2 3\n", "Colorado 4 5 6 7\n", "New York 8 9 10 11" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = DataFrame(np.arange(12).reshape(3, 4),\n", " index = ['Ohio', 'Colorado', 'New York'],\n", " columns = ['one', 'two', 'three', 'four'])\n", "data" ] }, { "cell_type": "code", "execution_count": 120, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index(['Ohio', 'Colorado', 'New York'], dtype='object')" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.index" ] }, { "cell_type": "code", "execution_count": 121, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array(['OHIO', 'COLORADO', 'NEW YORK'], dtype=object)" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.index.map(str.upper)" ] }, { "cell_type": "code", "execution_count": 122, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
OHIO0123
COLORADO4567
NEW YORK891011
\n", "
" ], "text/plain": [ " one two three four\n", "OHIO 0 1 2 3\n", "COLORADO 4 5 6 7\n", "NEW YORK 8 9 10 11" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.index = data.index.map(str.upper)\n", "data" ] }, { "cell_type": "code", "execution_count": 123, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ONETWOTHREEFOUR
Ohio0123
Colorado4567
New York891011
\n", "
" ], "text/plain": [ " ONE TWO THREE FOUR\n", "Ohio 0 1 2 3\n", "Colorado 4 5 6 7\n", "New York 8 9 10 11" ] }, "execution_count": 123, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.rename(index = str.title, columns = str.upper)" ] }, { "cell_type": "code", "execution_count": 124, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwopeekaboofour
INDIANA0123
COLORADO4567
NEW YORK891011
\n", "
" ], "text/plain": [ " one two peekaboo four\n", "INDIANA 0 1 2 3\n", "COLORADO 4 5 6 7\n", "NEW YORK 8 9 10 11" ] }, "execution_count": 124, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.rename(index = {'OHIO': 'INDIANA'},\n", " columns = {'three': 'peekaboo'})" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
INDIANA0123
COLORADO4567
NEW YORK891011
\n", "
" ], "text/plain": [ " one two three four\n", "INDIANA 0 1 2 3\n", "COLORADO 4 5 6 7\n", "NEW YORK 8 9 10 11" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "source": [ "_ = data.rename(index = {'OHIO': 'INDIANA'}, inplace = True)\n", "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Discretization and Bining" ] }, { "cell_type": "code", "execution_count": 126, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ages = [20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]" ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "collapsed": false }, "outputs": [], "source": [ "bins = [18, 25, 35, 60, 100]" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(18, 25], (18, 25], (18, 25], (25, 35], (18, 25], ..., (25, 35], (60, 100], (35, 60], (35, 60], (25, 35]]\n", "Length: 12\n", "Categories (4, object): [(18, 25] < (25, 35] < (35, 60] < (60, 100]]" ] }, "execution_count": 128, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cats = pd.cut(ages, bins)\n", "cats" ] }, { "cell_type": "code", "execution_count": 130, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 1, 0, 0, 2, 1, 3, 2, 2, 1], dtype=int8)" ] }, "execution_count": 130, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cats.codes" ] }, { "cell_type": "code", "execution_count": 132, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index(['(18, 25]', '(25, 35]', '(35, 60]', '(60, 100]'], dtype='object')" ] }, "execution_count": 132, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cats.categories" ] }, { "cell_type": "code", "execution_count": 133, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(18, 25] 5\n", "(35, 60] 3\n", "(25, 35] 3\n", "(60, 100] 1\n", "dtype: int64" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.value_counts(cats)" ] }, { "cell_type": "code", "execution_count": 134, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[[18, 25), [18, 25), [25, 35), [25, 35), [18, 25), ..., [25, 35), [60, 100), [35, 60), [35, 60), [25, 35)]\n", "Length: 12\n", "Categories (4, object): [[18, 25) < [25, 35) < [35, 60) < [60, 100)]" ] }, "execution_count": 134, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.cut(ages, [18, 25, 35, 60, 100], right = False)" ] }, { "cell_type": "code", "execution_count": 135, "metadata": { "collapsed": false }, "outputs": [], "source": [ "group_names = ['Youth', 'YoungAdult', 'MiddleAge', 'Senior']" ] }, { "cell_type": "code", "execution_count": 136, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[Youth, Youth, Youth, YoungAdult, Youth, ..., YoungAdult, Senior, MiddleAge, MiddleAge, YoungAdult]\n", "Length: 12\n", "Categories (4, object): [Youth < YoungAdult < MiddleAge < Senior]" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.cut(ages, bins, labels = group_names)" ] }, { "cell_type": "code", "execution_count": 137, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([-0.74424396, 0.2066452 , 1.86474886, -0.70776676, 0.62818737,\n", " 1.03842445, -0.50529569, 0.55092385, 2.22731445, 1.75860623,\n", " -0.00800918, 1.04578221, 1.11294619, -1.23562583, 0.87561248,\n", " 0.30704202, 0.17894525, -1.43081915, -1.20560094, -1.1775395 ])" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = np.random.randn(20)\n", "data" ] }, { "cell_type": "code", "execution_count": 138, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(-1.43, -0.52], (-0.52, 0.4], (1.31, 2.23], (-1.43, -0.52], (0.4, 1.31], ..., (-0.52, 0.4], (-0.52, 0.4], (-1.43, -0.52], (-1.43, -0.52], (-1.43, -0.52]]\n", "Length: 20\n", "Categories (4, object): [(-1.43, -0.52] < (-0.52, 0.4] < (0.4, 1.31] < (1.31, 2.23]]" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.cut(data, 4, precision = 2)" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = np.random.randn(1000)" ] }, { "cell_type": "code", "execution_count": 140, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(-0.0212, 0.668], (-0.0212, 0.668], (0.668, 3.227], (-0.0212, 0.668], (0.668, 3.227], ..., [-3.0222, -0.65], (0.668, 3.227], [-3.0222, -0.65], (0.668, 3.227], [-3.0222, -0.65]]\n", "Length: 1000\n", "Categories (4, object): [[-3.0222, -0.65] < (-0.65, -0.0212] < (-0.0212, 0.668] < (0.668, 3.227]]" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cats = pd.qcut(data, 4)\n", "cats" ] }, { "cell_type": "code", "execution_count": 141, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(0.668, 3.227] 250\n", "(-0.0212, 0.668] 250\n", "(-0.65, -0.0212] 250\n", "[-3.0222, -0.65] 250\n", "dtype: int64" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.value_counts(cats)" ] }, { "cell_type": "code", "execution_count": 142, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(-0.0212, 1.303], (-0.0212, 1.303], (1.303, 3.227], (-0.0212, 1.303], (1.303, 3.227], ..., (-1.24, -0.0212], (-0.0212, 1.303], (-1.24, -0.0212], (1.303, 3.227], [-3.0222, -1.24]]\n", "Length: 1000\n", "Categories (4, object): [[-3.0222, -1.24] < (-1.24, -0.0212] < (-0.0212, 1.303] < (1.303, 3.227]]" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.qcut(data, [0, 0.1, 0.5, 0.9, 1])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Detecting and filtering outliers" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "collapsed": false }, "outputs": [], "source": [ "np.random.seed(12345)" ] }, { "cell_type": "code", "execution_count": 144, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = DataFrame(np.random.randn(1000, 4))" ] }, { "cell_type": "code", "execution_count": 145, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
count1000.0000001000.0000001000.0000001000.000000
mean-0.0676840.0679240.025598-0.002298
std0.9980350.9921061.0068350.996794
min-3.428254-3.548824-3.184377-3.745356
25%-0.774890-0.591841-0.641675-0.644144
50%-0.1164010.1011430.002073-0.013611
75%0.6163660.7802820.6803910.654328
max3.3666262.6536563.2603833.927528
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "count 1000.000000 1000.000000 1000.000000 1000.000000\n", "mean -0.067684 0.067924 0.025598 -0.002298\n", "std 0.998035 0.992106 1.006835 0.996794\n", "min -3.428254 -3.548824 -3.184377 -3.745356\n", "25% -0.774890 -0.591841 -0.641675 -0.644144\n", "50% -0.116401 0.101143 0.002073 -0.013611\n", "75% 0.616366 0.780282 0.680391 0.654328\n", "max 3.366626 2.653656 3.260383 3.927528" ] }, "execution_count": 145, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.describe()" ] }, { "cell_type": "code", "execution_count": 146, "metadata": { "collapsed": false }, "outputs": [], "source": [ "col = data[3]" ] }, { "cell_type": "code", "execution_count": 147, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "97 3.927528\n", "305 -3.399312\n", "400 -3.745356\n", "Name: 3, dtype: float64" ] }, "execution_count": 147, "metadata": {}, "output_type": "execute_result" } ], "source": [ "col[np.abs(col) > 3]" ] }, { "cell_type": "code", "execution_count": 148, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
5-0.5397410.4769853.248944-1.021228
97-0.7743630.5529360.1060613.927528
102-0.655054-0.5652303.1768730.959533
305-2.3155550.457246-0.025907-3.399312
3240.0501881.9513123.2603830.963301
4000.1463260.508391-0.196713-3.745356
499-0.293333-0.242459-3.0569901.918403
523-3.428254-0.296336-0.439938-0.867165
5860.2751441.179227-3.1843771.369891
808-0.362528-3.5488241.553205-2.186301
9003.366626-2.3722140.8510101.332846
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "5 -0.539741 0.476985 3.248944 -1.021228\n", "97 -0.774363 0.552936 0.106061 3.927528\n", "102 -0.655054 -0.565230 3.176873 0.959533\n", "305 -2.315555 0.457246 -0.025907 -3.399312\n", "324 0.050188 1.951312 3.260383 0.963301\n", "400 0.146326 0.508391 -0.196713 -3.745356\n", "499 -0.293333 -0.242459 -3.056990 1.918403\n", "523 -3.428254 -0.296336 -0.439938 -0.867165\n", "586 0.275144 1.179227 -3.184377 1.369891\n", "808 -0.362528 -3.548824 1.553205 -2.186301\n", "900 3.366626 -2.372214 0.851010 1.332846" ] }, "execution_count": 148, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[(np.abs(data) > 3).any(1)]" ] }, { "cell_type": "code", "execution_count": 149, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data[np.abs(data) > 3] = np.sign(data) * 3" ] }, { "cell_type": "code", "execution_count": 150, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
count1000.0000001000.0000001000.0000001000.000000
mean-0.0676230.0684730.025153-0.002081
std0.9954850.9902531.0039770.989736
min-3.000000-3.000000-3.000000-3.000000
25%-0.774890-0.591841-0.641675-0.644144
50%-0.1164010.1011430.002073-0.013611
75%0.6163660.7802820.6803910.654328
max3.0000002.6536563.0000003.000000
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "count 1000.000000 1000.000000 1000.000000 1000.000000\n", "mean -0.067623 0.068473 0.025153 -0.002081\n", "std 0.995485 0.990253 1.003977 0.989736\n", "min -3.000000 -3.000000 -3.000000 -3.000000\n", "25% -0.774890 -0.591841 -0.641675 -0.644144\n", "50% -0.116401 0.101143 0.002073 -0.013611\n", "75% 0.616366 0.780282 0.680391 0.654328\n", "max 3.000000 2.653656 3.000000 3.000000" ] }, "execution_count": 150, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Permutation and Random Sampling" ] }, { "cell_type": "code", "execution_count": 151, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
00123
14567
2891011
312131415
416171819
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 0 1 2 3\n", "1 4 5 6 7\n", "2 8 9 10 11\n", "3 12 13 14 15\n", "4 16 17 18 19" ] }, "execution_count": 151, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = DataFrame(np.arange(5 * 4).reshape(5, 4))\n", "df" ] }, { "cell_type": "code", "execution_count": 152, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([1, 0, 2, 3, 4])" ] }, "execution_count": 152, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sampler = np.random.permutation(5)\n", "sampler" ] }, { "cell_type": "code", "execution_count": 153, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
14567
00123
2891011
312131415
416171819
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "1 4 5 6 7\n", "0 0 1 2 3\n", "2 8 9 10 11\n", "3 12 13 14 15\n", "4 16 17 18 19" ] }, "execution_count": 153, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.take(sampler)" ] }, { "cell_type": "code", "execution_count": 154, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
14567
312131415
416171819
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "1 4 5 6 7\n", "3 12 13 14 15\n", "4 16 17 18 19" ] }, "execution_count": 154, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.take(np.random.permutation(len(df))[:3])" ] }, { "cell_type": "code", "execution_count": 155, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([ 5, 7, -1, 6, 4])" ] }, "execution_count": 155, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bag = np.array([5, 7, -1, 6, 4])\n", "bag" ] }, { "cell_type": "code", "execution_count": 156, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([4, 4, 2, 2, 2, 0, 3, 0, 4, 1])" ] }, "execution_count": 156, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sampler = np.random.randint(0, len(bag), size = 10)\n", "sampler" ] }, { "cell_type": "code", "execution_count": 157, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([ 4, 4, -1, -1, -1, 5, 6, 5, 4, 7])" ] }, "execution_count": 157, "metadata": {}, "output_type": "execute_result" } ], "source": [ "draw = bag.take(sampler)\n", "draw" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Computing Indicator/Dummy Variables" ] }, { "cell_type": "code", "execution_count": 158, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1key
00b
11b
22a
33c
44a
55b
\n", "
" ], "text/plain": [ " data1 key\n", "0 0 b\n", "1 1 b\n", "2 2 a\n", "3 3 c\n", "4 4 a\n", "5 5 b" ] }, "execution_count": 158, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'],\n", " 'data1': range(6)})\n", "df" ] }, { "cell_type": "code", "execution_count": 159, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
00.01.00.0
10.01.00.0
21.00.00.0
30.00.01.0
41.00.00.0
50.01.00.0
\n", "
" ], "text/plain": [ " a b c\n", "0 0.0 1.0 0.0\n", "1 0.0 1.0 0.0\n", "2 1.0 0.0 0.0\n", "3 0.0 0.0 1.0\n", "4 1.0 0.0 0.0\n", "5 0.0 1.0 0.0" ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.get_dummies(df['key'])" ] }, { "cell_type": "code", "execution_count": 160, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
key_akey_bkey_c
00.01.00.0
10.01.00.0
21.00.00.0
30.00.01.0
41.00.00.0
50.01.00.0
\n", "
" ], "text/plain": [ " key_a key_b key_c\n", "0 0.0 1.0 0.0\n", "1 0.0 1.0 0.0\n", "2 1.0 0.0 0.0\n", "3 0.0 0.0 1.0\n", "4 1.0 0.0 0.0\n", "5 0.0 1.0 0.0" ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummies = pd.get_dummies(df['key'], prefix = 'key')\n", "dummies" ] }, { "cell_type": "code", "execution_count": 161, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 1\n", "2 2\n", "3 3\n", "4 4\n", "5 5\n", "dtype: int64" ] }, "execution_count": 161, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data1 = Series(range(6))\n", "data1" ] }, { "cell_type": "code", "execution_count": 162, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data1key_akey_bkey_c
000.01.00.0
110.01.00.0
221.00.00.0
330.00.01.0
441.00.00.0
550.01.00.0
\n", "
" ], "text/plain": [ " data1 key_a key_b key_c\n", "0 0 0.0 1.0 0.0\n", "1 1 0.0 1.0 0.0\n", "2 2 1.0 0.0 0.0\n", "3 3 0.0 0.0 1.0\n", "4 4 1.0 0.0 0.0\n", "5 5 0.0 1.0 0.0" ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_with_dummy = df[['data1']].join(dummies)\n", "df_with_dummy" ] }, { "cell_type": "code", "execution_count": 163, "metadata": { "collapsed": false }, "outputs": [], "source": [ "mnames = ['movie_id', 'title', 'genres']" ] }, { "cell_type": "code", "execution_count": 165, "metadata": { "collapsed": false }, "outputs": [], "source": [ "movies = pd.read_table('ch07/movies.dat', sep = '::', header = None, names = mnames, engine='python')" ] }, { "cell_type": "code", "execution_count": 166, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idtitlegenres
01Toy Story (1995)Animation|Children's|Comedy
12Jumanji (1995)Adventure|Children's|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
34Waiting to Exhale (1995)Comedy|Drama
45Father of the Bride Part II (1995)Comedy
56Heat (1995)Action|Crime|Thriller
67Sabrina (1995)Comedy|Romance
78Tom and Huck (1995)Adventure|Children's
89Sudden Death (1995)Action
910GoldenEye (1995)Action|Adventure|Thriller
\n", "
" ], "text/plain": [ " movie_id title genres\n", "0 1 Toy Story (1995) Animation|Children's|Comedy\n", "1 2 Jumanji (1995) Adventure|Children's|Fantasy\n", "2 3 Grumpier Old Men (1995) Comedy|Romance\n", "3 4 Waiting to Exhale (1995) Comedy|Drama\n", "4 5 Father of the Bride Part II (1995) Comedy\n", "5 6 Heat (1995) Action|Crime|Thriller\n", "6 7 Sabrina (1995) Comedy|Romance\n", "7 8 Tom and Huck (1995) Adventure|Children's\n", "8 9 Sudden Death (1995) Action\n", "9 10 GoldenEye (1995) Action|Adventure|Thriller" ] }, "execution_count": 166, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies[: 10]" ] }, { "cell_type": "code", "execution_count": 167, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ " at 0x10cce41a8>" ] }, "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ "genre_iter = (set(x.split('|')) for x in movies.genres)\n", "genre_iter" ] }, { "cell_type": "code", "execution_count": 168, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['Action',\n", " 'Adventure',\n", " 'Animation',\n", " \"Children's\",\n", " 'Comedy',\n", " 'Crime',\n", " 'Documentary',\n", " 'Drama',\n", " 'Fantasy',\n", " 'Film-Noir']" ] }, "execution_count": 168, "metadata": {}, "output_type": "execute_result" } ], "source": [ "genres = sorted(set.union(*genre_iter))\n", "genres[: 10]" ] }, { "cell_type": "code", "execution_count": 169, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ActionAdventureAnimationChildren'sComedyCrimeDocumentaryDramaFantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
10.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
20.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
30.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
40.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.0
\n", "
" ], "text/plain": [ " Action Adventure Animation Children's Comedy Crime Documentary \\\n", "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " Drama Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi \\\n", "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " Thriller War Western \n", "0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 " ] }, "execution_count": 169, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dummies = DataFrame(np.zeros((len(movies), len(genres))), columns = genres)\n", "dummies.head()" ] }, { "cell_type": "code", "execution_count": 170, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for i, gen in enumerate(movies.genres):\n", " dummies.ix[i, gen.split('|')] = 1" ] }, { "cell_type": "code", "execution_count": 171, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "movie_id 1\n", "title Toy Story (1995)\n", "genres Animation|Children's|Comedy\n", "Genre_Action 0\n", "Genre_Adventure 0\n", "Genre_Animation 1\n", "Genre_Children's 1\n", "Genre_Comedy 1\n", "Genre_Crime 0\n", "Genre_Documentary 0\n", "Genre_Drama 0\n", "Genre_Fantasy 0\n", "Genre_Film-Noir 0\n", "Genre_Horror 0\n", "Genre_Musical 0\n", "Genre_Mystery 0\n", "Genre_Romance 0\n", "Genre_Sci-Fi 0\n", "Genre_Thriller 0\n", "Genre_War 0\n", "Genre_Western 0\n", "Name: 0, dtype: object" ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies_windic = movies.join(dummies.add_prefix('Genre_'))\n", "movies_windic.ix[0]" ] }, { "cell_type": "code", "execution_count": 172, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([ 0.75603383, 0.90830844, 0.96588737, 0.17373658, 0.87592824,\n", " 0.75415641, 0.163486 , 0.23784062, 0.85564381, 0.58743194])" ] }, "execution_count": 172, "metadata": {}, "output_type": "execute_result" } ], "source": [ "values = np.random.rand(10)\n", "values" ] }, { "cell_type": "code", "execution_count": 173, "metadata": { "collapsed": false }, "outputs": [], "source": [ "bins = [0, 0.2, 0.4, 0.6, 0.8, 1]" ] }, { "cell_type": "code", "execution_count": 174, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
(0, 0.2](0.2, 0.4](0.4, 0.6](0.6, 0.8](0.8, 1]
00.00.00.01.00.0
10.00.00.00.01.0
20.00.00.00.01.0
31.00.00.00.00.0
40.00.00.00.01.0
50.00.00.01.00.0
61.00.00.00.00.0
70.01.00.00.00.0
80.00.00.00.01.0
90.00.01.00.00.0
\n", "
" ], "text/plain": [ " (0, 0.2] (0.2, 0.4] (0.4, 0.6] (0.6, 0.8] (0.8, 1]\n", "0 0.0 0.0 0.0 1.0 0.0\n", "1 0.0 0.0 0.0 0.0 1.0\n", "2 0.0 0.0 0.0 0.0 1.0\n", "3 1.0 0.0 0.0 0.0 0.0\n", "4 0.0 0.0 0.0 0.0 1.0\n", "5 0.0 0.0 0.0 1.0 0.0\n", "6 1.0 0.0 0.0 0.0 0.0\n", "7 0.0 1.0 0.0 0.0 0.0\n", "8 0.0 0.0 0.0 0.0 1.0\n", "9 0.0 0.0 1.0 0.0 0.0" ] }, "execution_count": 174, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.get_dummies(pd.cut(values, bins))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## String Manipulation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### String object methods" ] }, { "cell_type": "code", "execution_count": 175, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val = 'a,b, guido'" ] }, { "cell_type": "code", "execution_count": 176, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['a', 'b', ' guido']" ] }, "execution_count": 176, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.split(',')" ] }, { "cell_type": "code", "execution_count": 177, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['a', 'b', 'guido']" ] }, "execution_count": 177, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pieces = [x.strip() for x in val.split(',')]\n", "pieces" ] }, { "cell_type": "code", "execution_count": 178, "metadata": { "collapsed": false }, "outputs": [], "source": [ "first, second, third = pieces" ] }, { "cell_type": "code", "execution_count": 179, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'a::b::guido'" ] }, "execution_count": 179, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first + '::' + second + '::' + third" ] }, { "cell_type": "code", "execution_count": 180, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'a::b::guido'" ] }, "execution_count": 180, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'::'.join(pieces)" ] }, { "cell_type": "code", "execution_count": 181, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 181, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'guido' in val" ] }, { "cell_type": "code", "execution_count": 182, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 182, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.index(',')" ] }, { "cell_type": "code", "execution_count": 183, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "-1" ] }, "execution_count": 183, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.find(':')" ] }, { "cell_type": "code", "execution_count": 184, "metadata": { "collapsed": false }, "outputs": [ { "ename": "ValueError", "evalue": "substring not found", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mval\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m':'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mValueError\u001b[0m: substring not found" ] } ], "source": [ "val.index(':')" ] }, { "cell_type": "code", "execution_count": 185, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 185, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.count(',')" ] }, { "cell_type": "code", "execution_count": 186, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'a::b:: guido'" ] }, "execution_count": 186, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.replace(',', '::')" ] }, { "cell_type": "code", "execution_count": 187, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'ab guido'" ] }, "execution_count": 187, "metadata": {}, "output_type": "execute_result" } ], "source": [ "val.replace(',', '')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Regular expression" ] }, { "cell_type": "code", "execution_count": 188, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import re" ] }, { "cell_type": "code", "execution_count": 189, "metadata": { "collapsed": false }, "outputs": [], "source": [ "text = 'foo bar\\t baz \\tqux'" ] }, { "cell_type": "code", "execution_count": 190, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['foo', 'bar', 'baz', 'qux']" ] }, "execution_count": 190, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re.split('\\s+', text)" ] }, { "cell_type": "code", "execution_count": 191, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['foo', 'bar', 'baz', 'qux']" ] }, "execution_count": 191, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regex = re.compile('\\s+')\n", "regex.split(text)" ] }, { "cell_type": "code", "execution_count": 192, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[' ', '\\t ', ' \\t']" ] }, "execution_count": 192, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regex.findall(text)" ] }, { "cell_type": "code", "execution_count": 195, "metadata": { "collapsed": true }, "outputs": [], "source": [ "text = \"\"\"Dave dave@google.com \n", "Steve steve@gmail.com\n", "Rob rob@gmail.com\n", "Ryan ryan@yahoo.com\n", "\"\"\"\n", "pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}'\n", "# re.IGNORECASE makes the regex case-insensitive \n", "regex = re.compile(pattern, flags=re.IGNORECASE)" ] }, { "cell_type": "code", "execution_count": 196, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['dave@google.com', 'steve@gmail.com', 'rob@gmail.com', 'ryan@yahoo.com']" ] }, "execution_count": 196, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regex.findall(text)" ] }, { "cell_type": "code", "execution_count": 197, "metadata": { "collapsed": true }, "outputs": [], "source": [ "m = regex.search(text)" ] }, { "cell_type": "code", "execution_count": 198, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "<_sre.SRE_Match object; span=(5, 20), match='dave@google.com'>" ] }, "execution_count": 198, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m" ] }, { "cell_type": "code", "execution_count": 199, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'dave@google.com'" ] }, "execution_count": 199, "metadata": {}, "output_type": "execute_result" } ], "source": [ "text[m.start(): m.end()]" ] }, { "cell_type": "code", "execution_count": 201, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n" ] } ], "source": [ "print (regex.match(text))" ] }, { "cell_type": "code", "execution_count": 202, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dave REDACTED \n", "Steve REDACTED\n", "Rob REDACTED\n", "Ryan REDACTED\n", "\n" ] } ], "source": [ "print (regex.sub('REDACTED', text))" ] }, { "cell_type": "code", "execution_count": 203, "metadata": { "collapsed": true }, "outputs": [], "source": [ "pattern = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\.([A-Z]{2,4})'" ] }, { "cell_type": "code", "execution_count": 204, "metadata": { "collapsed": true }, "outputs": [], "source": [ "regex = re.compile(pattern, flags=re.IGNORECASE)" ] }, { "cell_type": "code", "execution_count": 205, "metadata": { "collapsed": true }, "outputs": [], "source": [ "m = regex.match('wesm@bright.net')" ] }, { "cell_type": "code", "execution_count": 206, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'wesm@bright.net'" ] }, "execution_count": 206, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.group()" ] }, { "cell_type": "code", "execution_count": 207, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "('wesm', 'bright', 'net')" ] }, "execution_count": 207, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.groups()" ] }, { "cell_type": "code", "execution_count": 208, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('dave', 'google', 'com'),\n", " ('steve', 'gmail', 'com'),\n", " ('rob', 'gmail', 'com'),\n", " ('ryan', 'yahoo', 'com')]" ] }, "execution_count": 208, "metadata": {}, "output_type": "execute_result" } ], "source": [ "regex.findall(text)" ] }, { "cell_type": "code", "execution_count": 209, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dave Username: dave, Dommain: google, Suffix: com \n", "Steve Username: steve, Dommain: gmail, Suffix: com\n", "Rob Username: rob, Dommain: gmail, Suffix: com\n", "Ryan Username: ryan, Dommain: yahoo, Suffix: com\n", "\n" ] } ], "source": [ "print (regex.sub(r'Username: \\1, Dommain: \\2, Suffix: \\3', text))" ] }, { "cell_type": "code", "execution_count": 210, "metadata": { "collapsed": true }, "outputs": [], "source": [ "regex = re.compile(r\"\"\"\n", "(?P[A-Z0-9._%+-]+)\n", "@\n", "(?P[A-Z0-9.-]+)\n", "\\.\n", "(?P[A-Z]{2,4})\"\"\", flags=re.IGNORECASE|re.VERBOSE)" ] }, { "cell_type": "code", "execution_count": 211, "metadata": { "collapsed": true }, "outputs": [], "source": [ "m = regex.match('wesm@bright.net')" ] }, { "cell_type": "code", "execution_count": 212, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'domain': 'bright', 'suffix': 'net', 'username': 'wesm'}" ] }, "execution_count": 212, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.groupdict()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Vectorized string functions in pandas" ] }, { "cell_type": "code", "execution_count": 213, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = {'Dave': 'dave@google.com', 'Steve': 'steve@gmail.com', 'Rob': 'rob@gmail.com', 'Wes': np.nan}" ] }, { "cell_type": "code", "execution_count": 214, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data = Series(data)" ] }, { "cell_type": "code", "execution_count": 215, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Dave dave@google.com\n", "Rob rob@gmail.com\n", "Steve steve@gmail.com\n", "Wes NaN\n", "dtype: object" ] }, "execution_count": 215, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 216, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Dave False\n", "Rob False\n", "Steve False\n", "Wes True\n", "dtype: bool" ] }, "execution_count": 216, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isnull()" ] }, { "cell_type": "code", "execution_count": 217, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Dave False\n", "Rob True\n", "Steve True\n", "Wes NaN\n", "dtype: object" ] }, "execution_count": 217, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.str.contains('gmail')" ] }, { "cell_type": "code", "execution_count": 218, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\\\\.([A-Z]{2,4})'" ] }, "execution_count": 218, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pattern" ] }, { "cell_type": "code", "execution_count": 219, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Dave [(dave, google, com)]\n", "Rob [(rob, gmail, com)]\n", "Steve [(steve, gmail, com)]\n", "Wes NaN\n", "dtype: object" ] }, "execution_count": 219, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.str.findall(pattern, flags=re.IGNORECASE)" ] }, { "cell_type": "code", "execution_count": 220, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: In future versions of pandas, match will change to always return a bool indexer.\n", " if __name__ == '__main__':\n" ] } ], "source": [ "matches = data.str.match(pattern, flags=re.IGNORECASE)" ] }, { "cell_type": "code", "execution_count": 221, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Dave (dave, google, com)\n", "Rob (rob, gmail, com)\n", "Steve (steve, gmail, com)\n", "Wes NaN\n", "dtype: object" ] }, "execution_count": 221, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matches" ] }, { "cell_type": "code", "execution_count": 222, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Dave google\n", "Rob gmail\n", "Steve gmail\n", "Wes NaN\n", "dtype: object" ] }, "execution_count": 222, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matches.str.get(1)" ] }, { "cell_type": "code", "execution_count": 223, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Dave dave\n", "Rob rob\n", "Steve steve\n", "Wes NaN\n", "dtype: object" ] }, "execution_count": 223, "metadata": {}, "output_type": "execute_result" } ], "source": [ "matches.str[0]" ] }, { "cell_type": "code", "execution_count": 224, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Dave dave@\n", "Rob rob@g\n", "Steve steve\n", "Wes NaN\n", "dtype: object" ] }, "execution_count": 224, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.str[: 5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Example: USDA food database" ] }, { "cell_type": "code", "execution_count": 225, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import json" ] }, { "cell_type": "code", "execution_count": 226, "metadata": { "collapsed": false }, "outputs": [], "source": [ "db = json.load(open('ch07/foods-2011-10-03.json'))" ] }, { "cell_type": "code", "execution_count": 227, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "6636" ] }, "execution_count": 227, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(db)" ] }, { "cell_type": "code", "execution_count": 228, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dict_keys(['group', 'nutrients', 'description', 'portions', 'tags', 'id', 'manufacturer'])" ] }, "execution_count": 228, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db[0].keys()" ] }, { "cell_type": "code", "execution_count": 229, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'description': 'Protein',\n", " 'group': 'Composition',\n", " 'units': 'g',\n", " 'value': 25.18}" ] }, "execution_count": 229, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db[0]['nutrients'][0]" ] }, { "cell_type": "code", "execution_count": 230, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
descriptiongroupunitsvalue
0ProteinCompositiong25.18
1Total lipid (fat)Compositiong29.20
2Carbohydrate, by differenceCompositiong3.06
3AshOtherg3.28
4EnergyEnergykcal376.00
5WaterCompositiong39.28
6EnergyEnergykJ1573.00
\n", "
" ], "text/plain": [ " description group units value\n", "0 Protein Composition g 25.18\n", "1 Total lipid (fat) Composition g 29.20\n", "2 Carbohydrate, by difference Composition g 3.06\n", "3 Ash Other g 3.28\n", "4 Energy Energy kcal 376.00\n", "5 Water Composition g 39.28\n", "6 Energy Energy kJ 1573.00" ] }, "execution_count": 230, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nutrients = DataFrame(db[0]['nutrients'])\n", "nutrients[:7]" ] }, { "cell_type": "code", "execution_count": 231, "metadata": { "collapsed": false }, "outputs": [], "source": [ "info_keys = ['description', 'group', 'id', 'manufacturer']\n", "info = DataFrame(db, columns=info_keys)" ] }, { "cell_type": "code", "execution_count": 232, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
descriptiongroupidmanufacturer
0Cheese, carawayDairy and Egg Products1008
1Cheese, cheddarDairy and Egg Products1009
2Cheese, edamDairy and Egg Products1018
3Cheese, fetaDairy and Egg Products1019
4Cheese, mozzarella, part skim milkDairy and Egg Products1028
\n", "
" ], "text/plain": [ " description group id \\\n", "0 Cheese, caraway Dairy and Egg Products 1008 \n", "1 Cheese, cheddar Dairy and Egg Products 1009 \n", "2 Cheese, edam Dairy and Egg Products 1018 \n", "3 Cheese, feta Dairy and Egg Products 1019 \n", "4 Cheese, mozzarella, part skim milk Dairy and Egg Products 1028 \n", "\n", " manufacturer \n", "0 \n", "1 \n", "2 \n", "3 \n", "4 " ] }, "execution_count": 232, "metadata": {}, "output_type": "execute_result" } ], "source": [ "info[:5]" ] }, { "cell_type": "code", "execution_count": 233, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Vegetables and Vegetable Products 812\n", "Beef Products 618\n", "Baked Products 496\n", "Breakfast Cereals 403\n", "Legumes and Legume Products 365\n", "Fast Foods 365\n", "Lamb, Veal, and Game Products 345\n", "Sweets 341\n", "Fruits and Fruit Juices 328\n", "Pork Products 328\n", "Name: group, dtype: int64" ] }, "execution_count": 233, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.value_counts(info.group)[:10]" ] }, { "cell_type": "code", "execution_count": 234, "metadata": { "collapsed": false }, "outputs": [], "source": [ "nutrients = []\n", "for rec in db:\n", " fnuts = DataFrame(rec['nutrients'])\n", " fnuts['id'] = rec['id']\n", " nutrients.append(fnuts)" ] }, { "cell_type": "code", "execution_count": 235, "metadata": { "collapsed": false }, "outputs": [], "source": [ "nutrients = pd.concat(nutrients, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 236, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valueid
count389355.000000389355.000000
mean66.07442914951.226806
std644.1765718664.025821
min0.0000001008.000000
25%0.04000010047.000000
50%0.78400013898.000000
75%9.00000019019.000000
max100000.00000093600.000000
\n", "
" ], "text/plain": [ " value id\n", "count 389355.000000 389355.000000\n", "mean 66.074429 14951.226806\n", "std 644.176571 8664.025821\n", "min 0.000000 1008.000000\n", "25% 0.040000 10047.000000\n", "50% 0.784000 13898.000000\n", "75% 9.000000 19019.000000\n", "max 100000.000000 93600.000000" ] }, "execution_count": 236, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nutrients.describe()" ] }, { "cell_type": "code", "execution_count": 237, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "14179" ] }, "execution_count": 237, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nutrients.duplicated().sum()" ] }, { "cell_type": "code", "execution_count": 238, "metadata": { "collapsed": false }, "outputs": [], "source": [ "nutrients = nutrients.drop_duplicates()" ] }, { "cell_type": "code", "execution_count": 239, "metadata": { "collapsed": false }, "outputs": [], "source": [ "col_mapping = {'description': 'food',\n", " 'group': 'fgroup'}\n", "info = info.rename(columns = col_mapping, copy = False)" ] }, { "cell_type": "code", "execution_count": 240, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valueid
count375176.000000375176.000000
mean65.17710815470.919291
std634.7151448395.334307
min0.0000001008.000000
25%0.03800010852.000000
50%0.78200014136.000000
75%9.00000019095.000000
max100000.00000093600.000000
\n", "
" ], "text/plain": [ " value id\n", "count 375176.000000 375176.000000\n", "mean 65.177108 15470.919291\n", "std 634.715144 8395.334307\n", "min 0.000000 1008.000000\n", "25% 0.038000 10852.000000\n", "50% 0.782000 14136.000000\n", "75% 9.000000 19095.000000\n", "max 100000.000000 93600.000000" ] }, "execution_count": 240, "metadata": {}, "output_type": "execute_result" } ], "source": [ "col_mapping = {'description': 'nutrients',\n", " 'group': 'nutgroup'}\n", "nutrients = nutrients.rename(columns = col_mapping, copy = False)\n", "nutrients.describe()" ] }, { "cell_type": "code", "execution_count": 241, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ndata = pd.merge(nutrients, info, on = 'id', how = 'outer')" ] }, { "cell_type": "code", "execution_count": 242, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valueid
count375176.000000375176.000000
mean65.17710815470.919291
std634.7151448395.334307
min0.0000001008.000000
25%0.03800010852.000000
50%0.78200014136.000000
75%9.00000019095.000000
max100000.00000093600.000000
\n", "
" ], "text/plain": [ " value id\n", "count 375176.000000 375176.000000\n", "mean 65.177108 15470.919291\n", "std 634.715144 8395.334307\n", "min 0.000000 1008.000000\n", "25% 0.038000 10852.000000\n", "50% 0.782000 14136.000000\n", "75% 9.000000 19095.000000\n", "max 100000.000000 93600.000000" ] }, "execution_count": 242, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ndata.describe()" ] }, { "cell_type": "code", "execution_count": 243, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "nutrients Alcohol, ethyl\n", "nutgroup Other\n", "units g\n", "value 0\n", "id 1159\n", "food Cheese, goat, soft type\n", "fgroup Dairy and Egg Products\n", "manufacturer \n", "Name: 3000, dtype: object" ] }, "execution_count": 243, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ndata.ix[3000]" ] }, { "cell_type": "code", "execution_count": 244, "metadata": { "collapsed": false }, "outputs": [], "source": [ "result = ndata.groupby(['nutrients', 'fgroup'])['value'].quantile(0.5)" ] }, { "cell_type": "code", "execution_count": 246, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 248, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 248, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAiUAAAD8CAYAAACow9QlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzsnXeYXVXV/z/f0BMgICAgYpAiWGihiIBkNBQLogiISBNR\nqQIiKK+ICYqCwqsvXSmGjghIbwmQGAiEhHRCUSSAP0pQaSFAIMn6/bHWyZy5c+7MnclMkknW53nO\nM/vuc/be65xJ5uy79lrfLTMjSZIkSZJkYdNrYRuQJEmSJEkCOSlJkiRJkmQRISclSZIkSZIsEuSk\nJEmSJEmSRYKclCRJkiRJskiQk5IkSZIkSRYJclKStEDSHEnjJU2U9Kik7eajr70lPS7pvpr6fpLe\njnEek3TBfNo8SNLxnWx7rKTl52f8JEmSpGtYemEbkCxyzDSz/gCSdgXOAJo62dehwHfN7KGKc0+b\nWX9JSwH3S/qamd1cnJS0lJnN6eS4HeE44Erg3UYbSEpxnyRJkk5gZmrrfHpKklrK/2D6Aq/OOyGd\nIGlMeFEGler3l/RIeD4ulNRL0inAjsClkn5Tb7CYeDwEbChpgKSRkm4Bpkbfx0uaImmypGNLY54s\n6SlJI4GNS/XDJRWTqtUkTYtyL0lnRl8TJR0l6QfAh4Dhku6La4bEWJPK41XYvdgegwYNWug25L3l\n/eX9LX5HQyxsI/NYtA5gNjAeeAJ4Ddgy6ncB/hhlAbfFpGMT4FZgqTh3PnBAlIcX7WvG6AdMiXJv\nYAywGzAAmAF8JM71ByYBywN9gMeAzUv1ywErAf8Aji+N2T/KqwHPRPkI4C+A4vMq8fMZYNXSeENL\ndq5c5xlZHnnkkUdPONZcs58tKgBm7byD0lOS1PK2mfU3s48DX8SXNgB2BXaRNB6ftGwMbAQMBLYC\nxkqaAHweWL/UXz1X3QbR1wPAbWZ2T9SPMbPno7wjcJOZvWtmM4EbgZ2Az0b9LDObgU+K2mMgPqky\nADN7vWRfYeMzwEclnS1pN3yCVIeF/remG49Bi4ANeW95f3l/XXFMn/4cPYmMKUnqYmajJa0uaXX8\nxX26mV1cvkbS0cBlZnZyB7t/2iJ2pYaZbbQRzf/b6k12ZtO8LNmhAFYze13S5rjX5jDgG3hcTAWD\nS+UmOh92syjStLAN6EaaFrYB3UzTwjagm2la2AZ0M00L24AuZcSIEYwYMaJjjdpzpeSxZB3AjFJ5\nE+AVfAKwC/Aw0CfOfQhYA/g48BSwRtSvSvPyy3BiKaVmjH7E8k1N/QDg1tLnLYGJNC/fTMGXb4r6\nYvnm7zQv31wMHB7l42hevjkMX74plpmKJZtJwHpRXg1YKcqfBMbXeUYGlkceeeTRAw5sUSFsoa0j\nPSVJLcvHskrhiTgo/jENk7QJ8LAk8KWNA8zsCUk/A4ZK6gW8BxwFPA9YG+O0dc4vMJsg6TJgbFx/\nkZlNApB0HTAZmI7HpBScBfxF0veAO0r1lwAfAyZLeg+fvFwQP++W9ALwQ2BI3IcBJ9W3rs0A8iRJ\nkkWCNdfst7BN6BBF0F+SJA0iyfL/TZIkSceQhC0OKcGS2gg47HSf0yR9oMFrV5D0H0kr1tTfJGmf\nTozdT9KUjrbrxDiVz03SByVdLelpSWMljZL01e62pzT+AEmvRwrxVEk/n8/+hkj6eifb/s/8jJ0k\nSZJ0HT1iUkIDrv7u7NPM3gHuBvYs6iStDOyAp8Z26/jzQb0xbgZGmNmGZrYN8E3gwwvAnjIjzQNd\ntwEOkLRF+WSIqi0IfrqAxkmSJEnaoadMSlohaXdJoyWNkzRU0hpRP0jSZSHCNU3S10M0a7KkO0sv\nOwE/ifrRktavPxoAfwb2K33eE7jHzN6V1FvSpSV7vhK29As7HlUnJNvDEzM2BL++W6qfIem0EAF7\nqHTv68XnSZJ+WafPzwOzrJRFY2b/MrPz27I5vBsjJN0cHpYzJB0gF1ObJOmjcd3qkm6Qi6k9Imn7\ntu7RzN4GxuHiaQdLukUuS39v9FcInk2S9I3SfZwn6QlJQ4EPlurnecAkbSVpeJT7SPpT/L4nStpT\n0unACuGxuTJ+j7dLmhDXddgLliRJkswH7UXCLgoH8GZFXd9S+VDgzCgPAkbiE67NgLeBXePcX4E9\nojwNOCnKB+JaGW3ZsAzwEs1ZG3cBX4ryr4BvFXbh2Sgr4Fkjy0b9hsDYKPcDJjdw34XA1/J45kkx\n9tzS2L8BfhrlW4D9o3xknef2A+B/2xizns0DcHXXDwLLAi8Ag+PcMcDvonw1sH2U1wUerxhjQPG8\nCYEzPIvnYDxAtm+c+zo+8SPGfQ5Yk+YJIcDauMjb1+PzM8AHorwVcH+UzyhsLP/7KT+jGO+Ppc8r\n1XlGlkceeSyYY1ES/0rmD1i8s2/WlfQX/KW0DD7JKLjLzOZG3IbMbGjUTwHWK1335/h5LfD7tgYz\ns/cl3QrsLemvwBZAIfi1K/AVSSfG52WBj+CTmPNiaWIOLjbWEY6T9LUofzjaj8E9HXdG/Thg5yjv\ngL9YwUXPzmhvAEnn4SJls8zs02F7PZvHmtkr0e5pmu9/Cs0J9jsDH5dUBDOtKKm3uUekzGcljcMn\nWKebZ/FsCwwzszfimh3x3w1m9oqkEcC2uIBaUf+SpPvLt1TnVncG9i0+lMYoMwU4Kzwod5jZg3X6\nwv9eJknS3UyfnpluSxI9eVJyLnCWmd0haQDuISmYBT4lk/R+qX4uLe/Zas61x5+BU3AvzC3WcsO4\nvczsH+WL5fvDvGxmm8Wy0TsNjFG0HYCro37azGbFMkQhBla+pzk031Px7QLqv5ynAnsVH8zsaEmr\n4Wm34Gmx9WyeVSrPLX0uP1eFzWUbqxhpZntU1DcintYWHRVPm/eczOwf8n1zvgScJuleMzututng\nUrmJxU30KEmSZH7pjHhaT4kpqXrBrgy8GOWDO9i2oPjm/E1cGAxJX5P06zrXj8A9B0cS39SDe/Al\nDKKPImizL+4tATgIKAdvKq79kKR7K8bqC7wWE5JNgO1q21Ywiua4l/2rLjCz+4HlJB1Wqu5TM249\nmxthKFDeOG/zDrYv8wCwr3yjvDVwefkx+PJcUb828LlSm2n4sg2UJl/AMFw/pbBrlSi+V8QZRV/v\nmNk1wJn4Xjh1GFw6mjpzb0mSJIs1TU1NDB48eN7RCD1lUrKCpOcl/St+Hoe/DW6QNBb4dxtt632z\nNmBVSZPwOIsfRv0GQJVrn1gTuwGPWfhb6dRpwDIRHDkF+EXUXwB8W74nzMdo6QUo7Fqblp6Pgruj\nz6nAr4lJUzv3dBxwVNzT2nWuAfga0CTpn5JGA0OAHzdgc5l6NhwLbB2BqY/hSqqdwsxuwgXSJuGB\nryea2StR/zTu9bkM32W44BfAOZLG4F6TgtOAD0TQ7ASaZxIXA1MkXQlsCoyJ8z+PNnVQHnnksQCO\nnib+lcwfKZ5Wg6QrgB+a2X8X0HhHAc+Z2e0LYrxk/lGKpyVJknQYLS7iaQsSMzuoqyYkkuZKOrP0\n+UeqEQozs/PLE5JIyS2nHncZqiPaJufs8CJMjlTe+f560sZ4/SS9Ham4j0m6YD7HGSTp+E62PVZS\nhzbuS5IkSbqHnJR0L7OAr6tB5djgo8C3uskeqF522RdY28w2NbPN8JTb17txPGjeJXhz4JOlLCOA\nBSmedhzQewGNlSRJkrRBTkq6l9nARUCrb/GqkUZXsyT86cCO4UU4tqZNH0n3ykXNJknaI+r7SXpc\n0kXhebhb0nJxbqsQC5tAKdCzhrVpDm7FzF4sUmYl7SIXZHtU0nWSekd9f7mY2lhJd0laswPjzSMy\nmB7CxdMGyIXbbsHjRZB0fMmDUw6gPVnSU5JGAhuX6odHBg2SVpM0Lcq91CzENlHSUZJ+gO92PFzS\nfXHNkBhrUu3zT5IkSbqZ9oRM8pg/0TdgRTwjZCXgR8DP49wQQvCruNaahcVurdNfL2BFaxYd+0eU\n++G7824an6+jWcxtErBDlH9LhWgbsE7YOB7fZXeL0hh/A1aIzz8Gfoan/44CVov6bwCXdmC8fsCU\nKPfGM2p2i3ufAXwkzvWP/pbHM4Qewz0rRf1y8Vz/ARwfbYYD/Uv2PxPlI4C/0BxHVQjTPUOzKF1/\nYGjJzpXr/B4WuqBUHnl0x5FCZUl3Aou3eFqPwMzeknQ5npXSsE5JHXoBp0vaCdcG+ZCkQmJ9mpkV\n8RvjgPUk9cWVS0dF/ZXAFypsfEHSx3BdlIHAvXKJ9d7AJ4BRkoSL1D2MeyY+BQyL+l7Ai42OF2wg\naTz+x/BmM7tHrs0yxsyej2t2BG4ys3cBJN2IC6f1ivpZwCy5qF17DAQujP8YmFmxPFWE+YNPUD4q\n6WzgTjy9uQ7WwJBJ0rNIobJkYZOTkgXD2bgXYkipbp7IV7zYl22gn/2B1YEtzRVrp9EsEFYWNptT\nqm/or4y52Nk9wD2SpuNpw8Nwz0ELzRNJnwIeM7Mdaur7NjJWUMSU1NKIeJpR/746Kp42DzN7Xa6r\nshueyvwNfAuDCgaXyk2kVkmSJElLFmfxtJ6KAMzsNXzpoPyCexbYOspfxb0Q4MsXK9Xpry/wSkxI\nPocvg7QYq4x5XMhrat4Ur1JQTdKWcuEwJBV7Bj0HjAZ2kLRBnOstaSN8b5811LxZ39KSPhHjvd7e\nePXsreAB4GuSlpfUBw/AfSCOr0paTtJKwFdKbZ6l+bmWN9QbBhymZqG0VaP+TVyID7my7VLmOiin\nAFvWN20wKZ6WJElSn8VZPK2nUvbx/y8e41DUXQwMiIDQ7Wj2EEwG5sp3qq0NtLwa2EYujnYA8ESd\nscp8B7gglkrq8UHgNkmTgYm4mNt5ZvYf4NvAtTHmQ8DG4VXZG/iNpInABOAzHRivLXubLzCbgIuj\njcWXjS4ys0lRfx3+rO7AY1IKzgKOkO+rU856ugT4FzA5nnmRdn0xcLd8Z+J1gBFx/krgpPrWLXxR\nqTzy6OojhcqShU2KpyVJB1GKpyVJknQYpXjakoGkOZFCPDFSd7drv1XDfU+r0lmRdEykIV/Zwf76\nSjqijfNrSrpW0j8i3fh2SRt2xvYO2NQiPTtJkiRZOOSkZPFgppn1N7MtgJ8CZ9ReoM6LkdVzCRwB\n7GxmB3awv1XxDQ3rcRNwv5ltZGbbAP8DrNlo5xE0nCRJkvRAclKyeFB+EfcFXgWoI0a2v1xGfryk\nC4uXuKQLJI0JcbFBtX1LWkHSnZIOlXQhsD5wl1ymfRu5wNo4SQ9GMCySPlEaa2IEzJ4OrB91v2lx\nEx68+56ZXVzUmdmUIsVY0glh48TCRrlw3JOSLpdL2n9Y9QXfTgl7Jkv6Q+WDlM6QC9BNlPTbzv06\nkiRJkk7RnpBJHov+gafBjscDX1/DU4ahtRjZJsCteIYJwPnAAVEuxMR64QJkn4rPz+BZPsOA/Utj\nlkXHVgR6RXkgcEOUzwH2i/LSuNhZPyoE1eKaHwD/W+fcLsAfoyzgNlzHpF/c/zZxrkrw7ZTyPUb5\nCuDLUR4CfB0PjH2ydE2Kp+WxSB4pcpb0RACzdt5nqVOyePC2heZHxJNciYubQUsxsoG4aunY8JAs\nD0yPc9+U9D188rAWLpr2GD4BuBn4rZldWxqzCNkHWAW4IjwkRrP+zcPAyZLWBf5qZk/Px+rKrsAu\nkdUjXOF1Izyj5jkzGxvXbUe14BvAQEkn4qJwq8b93VEa4w3gHUmXRH0bOzdbZ+8jSeabFDlLFldy\nUrKYYWajJa0uafWoKouRCbjczE4ut5G0Hi6Bv5WZvSlpCC2Fx0bhyqzlSUmZX+JxIF+X7y48PGy5\nVtJoYHfgTknfx+Xs6zEVTzWuQsDpVlraCdv7VdxjleDbcrhnqL+ZvRjLPy3E1cxsjqRt8cnbPsDR\nUa5gcKncRGqVJEmStKQz4mk5KVk8mPe1SdIm+BLMfyuuuw+4WdL/mdm/Q0BsJVw87C1ghnxjvS8S\nE4vg58AgSeebWdUmeysDL0T5kJItHzWzacC5kj6Ci7JNpo44nJndL+lXkr5rZpdEH5vicTL3AL+Q\ndI2ZzZT0IVxPpcX944Jv50nawMz+GfEk6wCv4O6N/0paEZ/8XF8eP67tY2Z3S3oYeLrKTmdw/VNJ\nkiQJTU1NNDU1zft86qmnttsmJyWLB8uXljUADjIzq10qMbMnJP0MGCpXbn0POMrMxoQI2hP4csiD\n5WbR9lhJf5J0hpmdRMv1izOBy6Pv8nLINyQdiE8eXgJ+ZS7lPkou1HaXmf2k5l72BM6WdBK+V9Cz\nwHExwdgEeDjuawYuIDe3bIuZ/UfSt3HBt+Xi3M/M7B+xLDM1bCkLrhXtVwZukVR4UH5IXdJ9niw8\nUuQsWVxJ8bQk6SBK8bQkSZIOoxRPW7KQdHKks06KlNtt2rn+MEkHLCj7OkI9QbOqekkzOtF/h9sk\nSZIk3Usu3ywmRNbNl4AtzGy2XIW1zZ2HzeyPC8S47qdDbovIyklXR5IkySJGekoWH9YG/mNmswHM\n7FUzexnmScX/JkTDRktaP+oHSTo+yhtIGqZmqfqPRn2VYFlvufz7hOhzn1pjJH032k2QdH0RpxGe\njrMjruTpstdD0nmSnpA0FN8ksMPUsbdWYG1dr9bvwrM0TL5DcCGfPzXaX9MZG5IkSZLOkZ6SxYeh\nwM8lPYln2VxnZiNL518zs80i8PRs4Cs17a8Gfm1mt0paFuglaRdgIzPbNrwLt0raEZ8wvGBmuwNI\nqsqmubGUQfNL4FA8JRdgLTPbQdLHcTG3v8bkZCMz+7iktYHHgUvr3OtZEVQLLTOP6tn7L2BD4MBC\nz0RSH1zD5XhJpwCDgGOAnwDrmdn7klauMz61QcRJ0pWsuWY/Xn752YVtRpIscNJTsphgZjNxYbTv\nA/8G/izpoNIlf46f1+ICY/OIFNkPmdmt0dd7ZvYuLQXLxgMb44JlU6L+dEk7mllVfMamcon7ycC3\ngE+Wzt0c4zxBs0fks2EbZvYScH8bt3uC+V4//c1sy1J9PXuhpcAawBzgL1G+CleHBZgEXCNp/7im\nDgtd1DOPxfiYPv05kmRJJD0lixGREjISGBnLFAfhcurgf+2oKBdUffWvFCwDkNQfj2E5TdK9ZnZa\nzSWXAXuY2WOSDsYl7wtmtTNuZ2lUYK2K4pl8GdgJ2ANXo/2Umc1tffngUrmJFE9LkiRpSYqnLcFI\n+hgw18wKwa8tgPLXrX2B3wLfpFl2HQAze0vSvyR91cxuieWbpagvWLY08KqZXSPpDXxpppYVgZcl\nLQPsD/y/eqbHz5HA9yVdge8K/Dl8Samh24+fjQqsEfe3N+4t2Z9mbZaPmNnfJD2EP7MVgTdbDzm4\nQdOSJEmWTFI8bclmRVw5tS++Qd3T+FJOwaqSJgHvAvtVtD8I+KOkX+CiavuY2TBVC5ZtBJwpaW5c\ne0RFf6fgAmWvAI/QrOJa66UxADO7SdLncXGz54GH6txnlZen6KOevXMr2r0FbBvxJNOBfSUtDVwV\nsSQCzjazigkJpHha0p2kOFqypJLiaUsAkqbh+9q8urBtWRxQiqclSZJ0GKV4WiJpDvAh4EFJ16lZ\nQr2j/cwTLZN0bGf7KfU3PNJ0J0p6QL7DcGf76hcxNJ1pO0DSZzo7dpIkSdJ15KRk8WemmS1nZp/A\n4ysO74I+jwN6V52Q76nTKPuZ2RZ4MO5Z89lXZ10XTcD2nWybJEmSdCE5KVmyeADX60DS8ZKmhPjZ\nsVHXwuMg6UeSfl7uQNIPcM/LcEn3Rd0MSWdJmgD8VNJNpet3lvTXOvaUg1w3iOunSTpD0qPA3pI2\nl/RweFRujJgZJG0VdROAeTsXSzpY0rmlz7dJ2inKX5A0Ti7oNiyycg4HjpPL8u8gae94LhMkjaj3\nICXlkUe7x1prrVf3P2OSJK3JQNfFHwHIgzi/CNwlT+c9GNgGz0J5RP4Cfp12PA5mdq5cBbbJzF6L\n6j7Aw2Z2Qoz1uKTVzOy/wCHUF0Er2APXPin4j5ltHX1NwncyflDSqbjI2fHAn4AjzWyUpN/Wmtnq\nIUirAxcBO5rZ85JWiR2L/wDMMLPfxXWTgV3N7CW1IZ7WecdMsiQxfXoGRCdJR0hPyeLPCnIxsTHA\ns/gEYUfgJjN7N0TX/oqLl3WE8l/b2dFHwZXAAXKvxnbAXXX6uDps+wxwQqn+OoCYFPQ1syJd93Jg\np+i3r5mNKo3XHtsBfzOz5wHM7PU61z0IXC7pu+SkPUmSZIGSf3QXf942s/7lCqnut7fZuOekoNFg\n1ndr0lEuA27DRdKurxYfA+BbZjahor49oTOon5M7m5aT7fI9tPu11cyOlO+uvDswTlL/kkeoxOBS\nuYkUT0uSJGlJiqclVVS9iB8Ahkg6A5+E7IkLiE0H1pC0KvA2/mKu8nK8CawMFCnGLcaIpY8XgZOB\nnTtoW7mfNyW9JmmH8IociHs73oj67c3sIVyLpOBZ4Aj5zOvDwLZRPxo4X1I/M3tO0qox2ZgR9+IG\nSeuHHP1YSV/AN+9rZ1KSJEmS1JLiaUkVrYIfzGyCpMuAsXH+IjObDCAXTxuLK7A+Uaefi4G7Jb1g\nZgOrxsDVWFc3s6catatO/cG4qNsKwDN4jArAd4A/yQXchpbubZSkZ3ERtieAcVH/H0nfB26KCcsr\nwG64R+cGSXsAPwCOV3N68r3Fc2lNxgok7ZMiaEnSMVI8LekW5Bkw481syMK2patRiqclSZJ0GCnF\n05JOIGmOPEV2ijohuCYXbNsU3323W8ao6atFGnAn2q7V2bGTJEmSriMnJUkVM82sv5ltSgcF12Jp\nZKaZNZnZ+21c2u4YaiMit4LOui6+DazTybZJkiRJF5KTkqQ9GhFce1LS5XLhtXWLhpJWl/SQpC82\nMkZFXx+WtF+MNzkCc4u+D5H0lKTRwA6l+nly+PF5Rqn8k+hngqRfS9oL2BrfhG+8pOXkwm1T5cJs\ntfonlPrKYwk+UhQtSbqHDHRNquio4NqGwIGRtVL84f4gcCvwUzO7v5Exon6joi9JawNnAFvGOMPk\nAalj8PSXLfFMoBHA+Dr3YjHOF4GvANuY2Sw1i6cdDRwfwb8fAL5mZptEmxRPSypJUbQk6R7SU5JU\n0VHBteeKCUmwLHAvcGKdCUntGM/RrPr6bKmvbYDhZvZqaJ1cDewEfLpUP5sQW2uHgcAQM5sFrcTT\nijfMG8A7ki6RtCfwTgP9JkmSJF1EekqSKjoiuAatxc5m46m4X8CXZjoyRm1fVQNbnfpi7F7Rn/AJ\nUkOY2RxJ2+ITmH2Ao6NcweBSuYkUT0uSJGlJZ8TTMiU4aYWkGWa2Uk3dlsAQXK59KVyM7AB8WeX2\nCFid1x4XJLsBeMTMWsVm1BmjX7kveVbMw8BWuBfjbuAcXEflYaA/8BZwHzDRzI6RdDKwkpmdJOlr\nwI1mtpSk3YBTgF3M7B2FeJqkW4Dfm9kISX2A3mb2b7mU/dNmtkaF7ZbLN0s6Iv92JknHkNpPCU5P\nSVJFo4Jrk2IiUXu9mZlJ2g+4RdKbZvaH9saorTezlyWdhMeMgE9YbgOQNBifGL0GTCy1vzjGnADc\nQ3hezOweSZsDj0qaBdwJ/AzfT+cPkt7GY1tuVXN68g/r2EiKpy3ZpChaknQP6SlJkg6iFE9LkiTp\nMI14Snp0oKukkyU9JmmSPKVzm4Vt08JCpdTXRuq7aMxOi5Z1crwBkuZK+k6pbvOoO34++vxM11mZ\nJEmSdJYeu3wjaTvgS8AWZjY70jkbDmpcDGl0L5kFNW538RjwDeBP8Xk/Wi7fdJQmPC7l4fkzK0mS\nJJlferKnZG3gP5ESSqSHvgwg6RRJj8iFsubFMkgaHnobSFpN0rQofyKuHy8Xzdog6m+SNFYuGPbd\nUj+HKoS7JF0k6ZyoX13SDdHXI8U38Pg2PiH6HxcBlS1oY6wZkk4Lux6StEbUrxefJ0n6ZUcenOoI\njIWdwyVdL+kJSVeWrtlG0qiwY3TpHtaRdFc8j9+Urt8l7HtULiPfO+oHxnOYFKm3y0T9NEmD4/lM\nkvSxOuY/ByxfPAc8w2feTsaS1g97xkr6W9GPpN3D7nGShkpaI+JhDgeOC5t2kLR3/A4myHVY6j3D\nPBbDI0XRkmQhY2Y98gD6ABOAJ4HzgZ1K51Ypla8Avhzl4UD/KK8GPBPlc4D9orw0sFy5H2B5YAqw\nKj4Zmgb0xbNQRgLnxHVXA9tHeV3g8SjfCnwmyr2BXhX302qs+DwX+FKUf4OLkQHcAuwf5SOBN+s8\np1b1eBbN12uvAQbggaNr45GcDwHbA8sA/yw9uxXj3g8Gno7Py+GaJuvEs/0bsEJc/2M8qHQ54Hlg\ng6i/HDgmytOAI6N8BHBxhd0D8F19jwaOCtsuBX6OC6CB66MU/W8L3BflvqV+DgXOjPKgom18ngys\nHeWV6zxTA8tjsTywJEm6h/j/1epvavnosZ4ScwGv/sD3gX8Df5Z0UJweKP9WPBn4HPDJdrp7GDhZ\n0onAehYCW/g36Il4lseHcbXRbYERZvaGmc0Bri/1szNwnjzz41ZgRbmHYBTwe0k/wCcbcytsqBoL\nYJaZ3RnlccB6Ud4B+HOU53k0uoAxZvZS/AOaGONtDLxoZuMBzOytuHfwl/5b8cymAv3wtOFPAKPi\nWRwU9RvjE8F/RtvLcTG0gptK99mvjn0G/AXXEdkPuJZmddg++ETl+hj3j8Ca0W5dSffEv4kTqP9v\n4kHgcrm3qscubyZJkvREevQf3XhxjgRGyvdKOUjSdbjnpL+ZvShpEO59gJKwVqkOM7tWvofK7sCd\nkr6Pv/w+D3zaXJZ8eKlNvehhxfW1G9H9RtLtwJfxF/WuZvb3eY2kAW2MVe5rDs2/M4ujLXvq0ZbA\n2KxSuTxevTHK18+N6wUMNbP9yxdK2qwdW4u+yuO2wsxekfQ+Pgk8hua9b3oBr1mNKFtwLnCWmd0R\nz3tQnb6PlAdM7w6Mk9TfzF5rfeXgUrmJFE9LkiRpSWfE03rspEQeKzDXzJ6Oqi2IeAP8Zf1fSSsC\ne9PszXgW34DtUfybdtHXR81sGnCupI8Am8W1r8UkYRP82z+4Tsfv5eJaM4G9cJc/wFDgWOCs6Hdz\ncy2P9c0SEHQ6AAAgAElEQVRsKjA1XnibAPMmJfhSUNVYUP8lPgr3FFwN7F/nmnrti+dwA/BVfHmm\nLZ4C1pK0lZmNi+falgT7aNxjtIGZ/TO8RetEP/3ieTwDHEizBklHOQX4oJmZQm3WzGbIY1P2NrMb\nwCdCZjYZF3N7MdoeXOqnEHojrl/fXOZ+rKQv4Mtw7UxKkiRJklqamppoamqa9/nUU09tt02PXb7B\n4xgul6cETwQ+Dgw2szdwAa2peADkmFKbs4AjJI0DPlCq/0b0MwF361+Bq4cuI2kq8GsiO8PMXozP\nY3AJ9Wm42ij4hGRreaDmY8BhUX+cPHhyIvAepcDMoHKswKjmOOAoSZPwGJB6rCDpeUn/ip/HARcB\nA+J+t6O1tHuLscPzsy8+0ZiIT76Wa+P6/wDfBq4N+x4CNo4lnkOAG6J+Dr7E0tZ9VhtmNtrMbq04\ndQBwqDwg9zFgj6g/NcYdiy/3FdwG7KkIdAXOVOxKDIyKCU0FymMxPFIULUkWLime1gkk9TGzmZKW\nwuMgLjWzWxa2XcmCQSmeliRJ0mG0uIunLUQGh5dhCh64mROSbkDdKI4XyzwfaP/KJEmSZEHRY2NK\nFiZmduLCtmFxR90vjpeujiRJkkWM9JQkiyqV4niqI7ImF3d7KOoflLRR1PeSdGYR0yPpqOhfcX4F\nSXfKBfF6S7pdLpw2WdI+VYZFuzxSqCxJki4mPSXJospQ4OeSngTuA64zs5Fx7hUz20rSEcCJwPeA\nJ4AdzWyupIHA6Xjm1WG45slmkamzSvRhwErAdcBlZna1XOX2BTPbHUDSSvXNS0fL/DJ9eptLy0mS\nLIHkpCRZJIlA4v7AZ3ENlz9L+h98NlAWWdszyqsAV4SHxGj+tz0QuLCITDWz16NewM3Ab83s2qib\nApwl6XTgDjN7sL6Fg0vlJlKnJEmSpCWd0SnJ7JukRyBpL1xf5FPA1mb2qqStcLn4z0saAowzs/Pk\ne9oMN7P1Jd2AT0ruq+lvGnAHsJKZHVyqXwWPZfk+cK+ZnVZhi6WnpCsQ+fcnSZYclNk3SU9F0sck\nbViq2gIXfatHX+CFKB9Sqh8GHCZP30bSqqVzPwdel3R+nFsbeMfMrgHOxLcxSJIkSRYQOSlJFlUq\nxfHauP63wBlyYbzyv+tLgH8Bk+Vp3PtFfbGccyy+6/AZwKbAmLju50ArL0kzC1/oq6cfKVSWJEkt\nuXyTJB1EKZ6WJEnSYXL5ZiEgaY5c6OuxSC09XlKbvwRJa0v6y4KysWL8fvINDavq3477mRA/D+ii\nMYvnNEXSdZKWb79V3b4OlnTufLRdq7NjJ0mSJF1HZt90PTOLXWolrQ5ci2/4NrheAzN7CfhGbb2k\npcxsTjfZ2cqMOvVP19l1d34pP6ergMOB/ytfoI65JDrruvg28BjwcifbJ0mSJF1Eekq6kdiY7vvA\n0TDP8zBS0qNxbFeqnxLlgyXdIuk+4F5Jl0n6atGnpKskfaU8jqQ+ku6NPidJ2qPU7+OSLgrPzd2S\nlotzW8nFxCYAR1GfSi+PXGzsKUmjo/9zon59SQ+HHb+UNKOBR/UAsGHY+6Sky+N5fFjSfooN8iLu\noxj/kGJ8YIdS/ZDQGyk+zyiVfxL9TJD068jo2Rq4Krw2y0k6Q9LUeDa/rftQFgHxsZ50pFBakiSN\nkJ6SbsbMpslVRdcApgM7m9l78sySa4FiP5fyN/0tgU3N7A1JOwE/BG6RtDLwGeCgmmHeAb5mZm9J\nWg0YDRQ76G4I7Gtm35d0HbAXcA3wJ+BIMxvV1ssX2EDSeHxyYsAPgGeAn+EZMW8Bw4GJcf3ZwO/N\n7C+SDqO+B6NQVF0a+CLNOydvBBxoZmPl2TBnxPN4HRgmn3CNwT1PWwJvAiOA8XXGsRjni8BXgG3M\nbJakVczsdUlHA8eb2QS5lP3XzGyTaLNy/ceSMSUdIYXSkiRphJyULBiKv8jLAudJ2gKYg7+Aqxhm\nZm8AmNlISefHZGNv4EYzm1tzfS/g9JjAzAU+JOmDcW6amRXxIuOA9ST1Bfqa2aiovxL4Qh1bWi3f\nyD03IwobJV1fupfPAIVnp0itrWKFmOyAe0ouBdYBnjWzsVG/Da438mqMczWwE/48y/XXUf9ZFgwE\nhpjZLGghogbNv583gHckXYJrmNxev7vBpXITKZ6WJEnSks6Ip+WkpJuRtD4w28z+LWkQ8LKZbSbX\nzXinTrOZNZ+vAA4EvonHQNSyP7A6sGXIrE8DisDRWaXr5pTq5/era7321sA1AG9XTHag9b1X9WFt\n9D2bWJaUd9jwJn5mNkfStvgEZh982W1g9dWDG+02SZJkiaSpqYmmpqZ5n0899dR222RMSdcz72UZ\nSzYXAkVmSF/gpSgfBCzVYJ+XA8cBZmZPVpzvi+8HM1fS5/C9XlrZUxAejtckbR9V+7cxdtXLfyyw\nk6S+sfyyV+ncaNyjAz6J6ki/tfVjYpwPxCRuP+BvpfpVJS2DTyAKnsXjRMA9NstEeRhwiKQVgLKI\n2pt4IDKS+gCrmNndwPHAZm3YnyRJknQx6SnpepaPZYllgfeBK8zs93HuAuBGSQcBd9PaK1CJmb0i\n6Qma93yp5WrgNkmTgEfxzenmNa/T5jvAnyTNxTe/q8f6NTElfwop91/jk4NXgSfxpQ/w+JerJP0U\nuKdU3+q22quPXYFPwmNGAG43s9sAJA3GJ0Cv0RzPAnAxHn8zIcafGX3dI2lz4FFJs4A78biYy4E/\nSHobj225Vc3pyT+s/1gyRqIjpFBakiSNkOJpPQBJvYFJQH8zaySbpduR1Cc2zVsKnyxdama3SJqD\n2yrcg/N3M6sXr1Kv7/8xs9PrnHsWn+jMxScwR5rZ6Pm4FeT75txmZn9t8PoUT0uSJOkgSvG0no+k\ngcDjwDmLyoQkGBzeiCnAM2Z2S9S/i/+76gU8DxzZib5/2sa5uUCTmW1pZv3nd0KSJEmSLDrk8s0i\nTuxuu97CtqMWMzuxzqm5ZrZFuUK+a++VQO+oOtrMRsuVVK8DVsL/LR4B7E5zZs5UMzuwpn9RMZmW\ndCaeQTQX+JWZ/aWd+vPwINZ/4ctsRT9nhA2zgaFm9uMGHkeSJEnSBeTyTdKlSJoNTMYnD8+Y2V4R\nozG3rM9iZttIOh5YzsxOj0yZ3rEk9KaZVWqERGbRm/gk410z+4xcBO37ZrZbpEKPBbbFRdUOq6jf\nHjg86tfGPVGH4rErD5V1SszszQob8j9NDWuu2Y+XX352YZuRJMkiTCPLN+kpSbqaVqm+1NdnGQtc\nGhk0t5jZpAbHaDKz10qfd8CF6Iqg4BH45GPHOvU7lepfknR/9NMBnZKcl5RJcbQkSbqCnJQkC4If\nUqHPYmYPhODbl4HLJP2vmV1F+6ktjZyvmjXUqyfs6aROSRMpnpYkSdKSzoin5fJN0qVImmFmK9XU\n/Q74l5n9XtIhwCVmtpSkjwD/L/RVjgI2MLPjJf0X+GDVZoSxfLNVoeYadXviewx9GVgNT1X+NO5B\naa9+TWAq8F08hbh3CN31xdVs16iwwdJTUovIvyVJkrRFLt8kC4OqN1OtPstbUd8EnCjpfWAGzXv6\nXARMkTSuItC1Vf9mdpN8c8NJeKzJiWb2CtBW/efxycjzwEPR1Uq4xkkDOiVJkiRJV5OekiTpIBno\n2poMdE2SpD26TKdE0rKSNpO0qaSG9xLpDlTain5JRNKgyFppqH5RRNIASa9LGi9pqqSfz2d/QyR9\nvZNt/6cz7cwsj9KRE5IkSbqCdiclkr4M/BM4BzgPeFq+DfzCIr+lLh6MjCydbYADIjNnHhEQuyBo\nS6gtSZIkWYA04in5X+BzZtZkZgOAzwG/b6fNAkXS6pJukPRIHNuX6odKmiLpYknPxuZu/SRNKbX/\nUfFtXdJwSb+TNFbS45K2kfRXSU9J+mWpzf4x1nhJF8rpFd/aJ0uaJOnYClt3lzRa0riwbY2oHyTp\n0hj/aUk/KLU5OcYfCWzcwWfTys6oPzT6HC3pIknnRH0Lr0PhmQrvxghJN4d9Z0g6QNKYuNePtvW7\nqIeZvQ2MAzaUdLCkWyTdB9wb/Z0Zv79Jkr5Rsus8SU9IGgp8sFQ/TdIHoryVpOFR7iPpT/G7mShp\nT0mnE0Jtkq6U1FvS7ZImxHXljf6SJEmSbqaRQNcZZvZ06fMzeFDiosTZwO/M7CFJ6+JZFJ8ABgH3\nmdlvJO2Gb0JX0JbHZVaIex0D3AJsAbwO/FOeSbImsC+wfaSRno/vtPs4sI6ZbQYuvlXR9wNmtl2c\nPxT4MVCoo26MB3/2BZ6SdEGM/Q18x9plgfH4pnvtImmTKjvjpf+z6PstYDgtN7UrU35OmwGbxLOY\nBlxsZtvGc/oBvrNuvd9FK/PCxtXwjJhf4BoiWwKbmtkbMTnazMw2VYifSfobLn62kZl9XM3iZ5dW\n2Fv+fArweul30zcCZI8qdFVivBfMbPf4vBJ1iLndEkfGjiRJ0p00Mil5VNKdwF/wP/D74C+HrwNY\ng5uYdTM7Ax9X85tiRfk29DsCX4N5u8S+Vq+DGm6Nn1OAKZGxgaR/AusCnwX6489BwPLAdFxs66OS\nzsZ3oa3afXddSX8B1gaWwV/uBXeY2Wzgv5Km45OfHYGbzGwWMEvSra16rM/AOnbOAEaY2RtxX9fT\nLGjWFmNLz+JpfMIB/pyaolz1u+gdHpEyn5U0Ds+KOd3MnpBrhAwr7KLj4mdQX8NkZ3yCRrSr2r14\nCnBWeFDuMLMH6/TFkrqKmCJpSZJ0J41MSooX2YD4/G9gBeAr+F/mRWFSIuDTZvZ+i8rWWRLFX9TZ\nQDlmYfma62bFz7mlMvj9Lh39XG5mJ7cyRNoc2A04DPdwHFpzybnAWWZ2h6QBuDendlxw5dP5Tdmu\ntFPSV6n/8p5NLOvFxKIc2Fy2r/xs5pZsrfxdVDDSzPaoqJ/ZRps2xc+CefbT+vdar08AzOwfkvoD\nXwJOk3SvmZ1W3WxwqdxEiqclSZK0pDPiae2+9MzskM4a1E1UvUyHAscCZ4FPDMwly0fh345/K2lX\nYJW4fjqwhqRVgbfxDdju6oAN9wE3S/q/ENpaFde4mAm8F8sCf8c3oatlZeDFKB/cwH2OBIbEt/dl\n8cngH9pp056dY4HfywXCZgJ74fvVADwLbA3cAHwV9+Z0hHq/i87wAPB9SVfg4mefBU4Im4r6NfE4\np6ujzTRgK9yLs1epr2HAUfgSE5JWMbPXgfckLRXLW2sDr5rZNZLeoPWEssTgTt5SkiTJkkFTUxNN\nTU3zPp966qnttml3UiJpCNWCVd+puHxBsIKk52n+1vw74BjgAkmTcA/ISOBIPE7hGkkHAA8DL+Mx\nMrMl/QJ/Of8/4IlS/219EzeAWGr4GTBUUi/gPfyF9y4+gegV155U0cepwA2SXgXup/4OwMVYE2K5\nZzI+mRrThn0ny4Nr5U3tI5JOqbXTzMZI+nX09SrwJL7vC8DFuIDYBPzFXs9zUe85HQucX/G76DDW\ntihalfgZ+O/80phUjCjVnxZ2TcG9KacCN+P3OyWWkq4EzpQ0F39WR3TG7iRJkqRztCueJt+BtWB5\nYE/gRTM7pjsN6wrkmipz4lvwdsAF1nqzuCUSSX3Md+RdCrgJuNTMblnYdvUEKpYFlxgy0DVJks6i\nBsTTOqzoGt+4HzSzNlM9FwUkbYgH6PbC4x+ONLNx3TDOmsD/4cser+MejeNqspa6eswhwG1VgcZy\nEbXv4d/25+LLOD+x0l4yks7Egz+XA4aa2XE1fZwK/M3MykGkCxxV7KUT9XNwD8oyePbNwWb2bgf7\nPhi4x8xe7mA76+j/myRJkiWdRiYlnQmk3IiSLsSiTEwKFoRn5CZgiJntByBpUzzWoaFJibrwLSfp\ncHyysa2ZzZC0NB5HsQLNe85gZidK6mVmc6v6MbNBVfULgXrPZWYplfcq4HB8YtgRvg08hi/rJUmS\nJAuZRhRdZ0h6s/gJ3Ab8pPtN6xlI+hwe3HpxUWdmU8xsVJw/QS4wNlHSoKjrJ+lJSZdHjMOHJe0i\n6SFJj0q6TlLvuPYUuQjZZEn1AlzL/BQ43MxmhC2zzey3ZvZW9DdD0lkRM7Jd9D+mtn+VRNTkgmSD\n5YJvkyR9LOp3kguNjY9zfSqez01yIbopkr5bqp8h6bR4Lg+pWURuvfg8SSWxunZ4ANiw3niqELWL\nZcmtgavC/uU68ayTJEmSLqTdSYmZrWRmK5d+fszMblwQxvUQPoUrkrZC0i64yFchCra1pB3j9IbA\neWa2KZ4B9DNgoJltHf39KK4718w+HaJfveWy/5XIxb76mNnzbdjbB3jYzLY0s4ei/20b6P8VM9sK\nz/w5IepOwJfE+uOZMe9UtDvEzLbB5eSPlWcAFXY8ZGZb4JOK70X92cD5ZrY58FIb91GIry0NfBHX\nGKk33haEqF30OyT+DY8FvmVm/UMHpiPPeok61lprvTZ+FUmSJF1Doxvy7RHfrs+StHt3G7UYsSuw\ni6TxuBLrxjSLlD1nZmOjvB2uejoqPBgHAR+JcwPlUvCT8dTXTzY6uKRdw5MxTR7oC555Uo5DabT/\nm+LnOJozhkbhqcU/AFatsxR0nKSJwGjgwzTf/ywzu7Oizx2AP0e5KqW6YIV4rmOA52hWdK0a7xlC\n1E6u7FsoEouWadQdeNa2RB3Tpz9X/1EkSZJ0EY2kBJ+Bf+ssdCCOlbS9meVGZs5UYO8654SrlV7c\nolLqR8tUW+HBpvvXXLcccD7Q38xelC//1BUEixiStyT1M7PnzGwong58G80iaO8W8Ssd7L8QSpsn\n6mYu33878GV8QrWrmf29ZP8A4PO4mNos+T40Rf9lcbWyUFzxJiyeSz3ers2kqjeemb2uZlG7w3FV\n4u/WtO3Qs07xtCRJkrbpFvE0XN1yi+JbsKTLgQnk7qoAmNn9kn4l6btmdgnMC3Tti+t8/ELSNZF+\n+yGaX8blF+5o4DxJG5jZP+XxJOsAr+Av6P9KWhGf/FzfjklnABdK2s98/5hCXr6gPO7ynei/uSNp\nfTObCkyVtA2+L87fS5f0BV6LCcImuEeoyo4yo4D98Enw/nWuqde+cjz5/jpVonYzcDE76PCzGNyG\naUmSJEm3iKcFq+AiW+B/+JOW7AmcLekkPK7iWTwl+J/xcnzY5wbMAA7A03TnZZWY2X8kfRu4Nr6x\nG/CzkD2/BPfGvERL4bTKrBQzu1AecPqIpHfxjJtR+ESyRbuYtFzcQP/1MmCOkwf6zok+alVx7wYO\nlzQVeAoXsGu3T1zw7sf4Zoj1qGpfb7x1qBa1uwz4g6S3gc8A9Z51BUvWHjBrrtlvYZuQJMkSQCPi\nafvh376H43+JdwJOMrPrut+8JFn0UOqUJEmSdBg1oFPSZqBruP4fxN3gfwVuBD6TE5IlD0lz5Kmz\nRQrwj6P+WEnLl66bUb+Xyn7XlsvoN3r9EEnPhA3jJR3dkfHq9HmwpHPnt58kSZJk/mhz+cbMTNKd\nkbZ66wKyKVk0mVkbWBocB1yF7/sD7e/i2wIzewnfTbkj/MjMbmr/sg6Rro8kSZKFTCMpweMjiDFZ\nsmnlcotU4A8B90u6r7m6UhRtSKTkjpL0tJqF2frJBeQKkbMz5cJnEyUdVceWVv9uJe0nFz2bHBlj\n7dUfIukpSaPxNOSifp8Yf4KkER19SEmSJEnnaSSm5Elc6Os5PI212IF2s+43L1lUkDQb36m42J35\ndDO7XtI0PI32tbhuLrC7md0p6TfAG2b2a/lePb3NbF9JHwduNbONIj36NjPbTNIRuD7IvuGlW8XM\nXq+xYwge1/Rm2HEgHoQ9Gheoex0Yhouwja1TPwZ4JOrfxHcTHm9mx4RGyW5m9pKklc3szYpnsUR4\nVXLzvSRJupJGYkoayb7ZrYvsSXo2rXRBSpT/kdWKou1cOnczgJk9Ialq/6SBwIVFFGnthKTEiVba\niFDSHsBwM3s1Pl+NT1yoU6+a+utoFnV7ELg84lxabXbYzOI/L5k+fcnKMEqSZOHTyKSkKnCxQ8GM\nyRJFPVE0aBZgg67Pqa3XX1W91bvezI6M5crdgXGS5nmBWjK4VG4ixdOSJEla0l3iaeOBdYHX8D/k\nqwAvS5oOfM/MKvd9SRY76r3038QFyF5t57pG+hsGHCZphJnNkbRq9YSgFWNwnZgPAG/g4mvn4Ms3\nbdWviuu47ANMhHmCcGOBsZK+QPO//RoGN3ibSZIkSybdJZ42DLjBzO4B308F2AsYAlwAfLoTtiY9\nj+Xle80UMSV3x1YDFwN3S3rBzAZSf12jtr7qukuAjwGTJb0XfV/QXjszezmE60ZE1e1mdhtAG/WD\n8XiT14gJSXCmpGIp514zm1x9O4v/0kYKpiVJsqBpJNB1SqQEl+smR2DixNjlNUmWGFI8LUmSpOPM\nt3ha8JKkn0TqZr8QzXpF0lK4XHqyCFASN5ss6caQmu9MPy3E0BYGbdkgabikJ0sibl/vgvEGSTp+\nfvtJkiRJ5o+6kxJJxaZlQ/Et4G+OY118bX4pOi56lXQfM82sf6RqzwAO62Q/xwG9u8KgmLh2hw37\nmdmWcb9tZMgkSZIkPYm2PCVbyXe1PRAYhKd2DozyHDN7z8yeXgA2Jh3nYWCD4oOkEySNCUGyQVHX\nW9Lt4XGYHKJhhRja8EIMTdIF0XZK0Tbqp0UAKZK2kjQ8yoMkXSHpQeCK8K6NlPRoHMXOvQPC63G9\npCeKSXCVDRVUiacdHzZOlnRsA/Unh3jaSGDjUv0xkqbGs7qmY489SZIkmR/aCnT9A3AfsD7waKm+\nCHRcvxvtSjqOYJ53Yhfg/vi8C7CRmW0rScCtknYEPgi8YGa7x3UrmdkMST8EmkpZLz81s9flO+ze\nJ+lGM3uMtgNXPw7sYGbvxTLMzlHeELgWKBSCtwA+AbwMjJK0vZmdW2FDLVfJd0A2fKL8UeDg6Hcp\nfIfkEVGuV/8NYDNgWTzDrPg3/hNgPTN7X9LKdR+2ek6ga4qgJUnSU6g7KTGzc4BzJF1oZkcsQJuS\nzrFCZMd8GJiGTyoBdgV2KWXO9MGFwh4EzpJ0OnCHmT0Y14uWqSXflPQ9/N/KWvgk4rGaa2q51cze\ni/KywHmStsB1SzYqXTcm9r5B0kRgPeChChtq+ZaZTSg+SDoQuMnM3o3PN9IsklZV3yvqZwGzJJX3\ndZoEXCOpWK6sQ88JdE0RtCRJegrtpgTnhKTH8LaZ9Q/PxD3AHvhLVbgk/MW1DST1B74EnCbpXjM7\nreb8esCPgK3M7E25xHsRgDqb5mWU2qDUmaXyD4GXI1trKeCd0rmymFqt0FpbtPeWLbx5tdcW9XXF\n04Av4xOXPYCTJX3KzCoCugeXyk2keFqSJElLuks8LekZCMDM3o3YiWvwSck9wC8kXWNmMyNO6H38\nd/+qmV0j6Q3g0OinLIa2Mi4uNkPSmsAXgeFx3TRgq+h/rzbs6gv8K8oH4Usn7VEryNYeDwBD5Bvu\nLQXsCRyAT5qGhDeoqv7XuCfnKzR7lj5iZn+T9BCwL7Bi2FPD4AZNS5IkWTLpLvG0pGcwbz3BzCZK\n+oekfc3sOvkGeA9HHMQM/MW8ES4UNhd4Dyg8Yi3E0GJZ5Ql8YvFgabxfAJfGhGZEG3ZdANwo6SDg\nblp6USrtr7WhjeuK+50g6TJcqdWAi8xsEkAb9dfhGwxOxxVhkbQ0Hq+yMj7JO7tqQz6n5yyJpAha\nkiQ9hXbF05IkaYlSPC1JkqTDqIvE03okahYTK0S2PhKpq//XQNtjJD0u6UpJu8sF4+pde7Ckc7vW\n+spxhkcMSG397nF/EyU9FkGpSBqiDgqLSZoRP/tJmlKqvzb6P1bSYEmfb6OPhsYt/X6mSLpO8yHY\nNj+/g2i7VmfHTpIkSbqOxXn5ZqaZ1b7Enwca2UDwCGCgmb0Yn29v5/qF8rU5lhv+CGxtZi9JWgbP\nYOksVluOF/bWZrZRdZNOM+/3I+kq4HCgxYRRHXNJdPZ38G08m+jlTrZPkiRJuojF1lNCxaK/XLCr\n2JBtkKRLwwPxtKSjo/5CXIPlrvAMzPsWLhcYmxLelxGlrteRdJdcjOs3lcZIp0h6RC7i9YdS/XBJ\nZ8S5JyXtEPXLh4diqqS/0jrDBWAlPIDzNQAze9/M/lE6P0DSqLi/ed4LVYiptcE9wIfCq7Fj2RMS\ndj8W/fy2vXHb4AFgw/DQPCnp8vDUfFjSfvHMJkcga3EPh8TzHg3sUKofUnOvM0rln0Q/EyT9WtJe\nwNZ4HMl4ScvFPU2tuKckSZKkm1mcPSWFboeAZ8ysyBApf6PeGM/l7As8pdBkkbQbId4l6eBSm1OA\nXcMrURbW2hwXAns/+jnHzF6osedcM/slgFzx9MtmdkecW8rMPi3pi3haxy64t2ammX1S0qa4wFcL\nwr7bgOfk6qe3A9eWvAtrmdkO8kDXW4G/qo6YWuiUVK317QHcVvJqHBo/PwB8zcw2ic/l59Fq3Ip+\nC7G3pfGsnruifiPgQDMbK2lt4AxgS+B1YJikPfDA1MFR/yYeaNvq+RSPKcb5Ip5ls42ZzZK0SojC\nHQ0cH8Gybd1TS+MXMfG0FEhLkmRxYHH2lLwde6NsWZqQ1HKHmc02s//iWRhrRn098a4HgcslfZeW\nE7r7zOytEON6HKhKdxgoabSkycDngE+WzhUv7XGltjsBVwGY2RRc1KsVZvY94PPAI7imyKWl0zfH\nNU/gCq7QUkxtPD4x68zSzBvAO5IukbQnLfVHqsatpZg0jgGeK9n9rJmNjfI2wHAzezW0Qq7Gn8un\nS/WzgesasHcgMCR+R5jZ66Vzxe+6rXuqwRapY/r05xp4BEmSJIs2i7OnpBHK4l1zaed5mNmRkrYB\ndgfGqTnwtE0RMEnLAecD/c3sxVgyKS/HzKrXttxNG3ZNBaZGbMYzwHcq7FLpZ6WYGh2IyzCzOZK2\nxV/2+wBHR7neuLW8XRvzE96H2pThqvZtiZ/NE3ULT9Cyda5r3Wnb91TD4FK5iRRPS5IkaUmKp7Wk\nyxfTzV4AACAASURBVP3rktaPb/FjJX0B3zG5EZbHX6T/lbQisDdwfTttRgL7AyMkfQrfp6XWnj54\nEOrfompL3OtQaX78rBJTe8/M/gOt1E+rysXYvYE+Zna3pIeBepsz1vs9NFI/Bjg7llXewHenPgfX\nHTlb0qq4uNs+wMRo8yweJ3ID8FVgmagfBpwS9/2OpFVjb51CqK14nr0buCdSPC1JkqRtUjytJR3N\nxmiVeVLBmZKKpY57zWyypC3bG9fM3pB0MTAVeIkQ62pnrAtx1dGpuHjZoxXXCPhxBM6+g3sZDq7T\nr4UtwyRtQmsxtf9Q/xlUlVcGblFzKu8P2xq3gnbrzexlSSfRLM52u5kVgcqDgdF4kO/EUvuLw64J\n+ARsZvR1j6TNgUclzQLuBH4GXA78QdLbeGzLrRX3VMGiF1OSJEnS00nxtCTpIErxtCRJkg6jJVk8\nLalGzaJlEyU9Kmm7dq5vIaTWifEqxdSi/pmw5VFJn+7sGNHfjPavqmy3eWTmJEmSJAuZnJQsecyM\nrKQtgJ/iKbft0V1ugRMi2PX/t3fe8XJVVf9+vgkQQglVCYKEACqdkACCtKtIVCCgCEgRkFfBV0RQ\nyisKypUiivyQaqEYescIBIRQAoHQQnoCoYYOEVEhFGlZvz/WmtxzJ2funRsm3JL1fD6XObPPPnvv\ns2fI2bP2Wt/1M+Dc6pPyrML1Mr9jHIRnSk6SJEk6mVyULHwUTWfLEJl4JS0p6fawWkwOPZDWF0pr\nhGVjiKRekk6Ri75NUsjbR72zJT0qaRS1Q4KLjAHWjGtHS/q9pIeAQ8NSc0f0cZukVaPe6pLui7Ge\nUOh7rkBevD9LngwQSZvKRd0mRXh2Pzyx4B5xX7tL2kYtqQnGh/NrkiRJ8jHQkx1dk3Iq+iB9gf64\nxgm4o+zXzexNSSvgTqQ3VC6S9FngSmA/M6vk2PlPiL4tBoyNRchgXJxtnRA/e4TW2ill7AwUt4gW\nNbPNot8bcH2RSyUdAJwFfAM4AzjHzC6TdHBVe/NYTeQS/FcCu5vZhIiCegf4JTDEzA4t9Hewmd0f\nEUb/LRuwUjwtSZKk4aSlZOGjIiq3Dh5tckmU9wJOljQZuB2Xlq9YOT6JC6LtbWbTomwosF9EuTwI\nLI+LsG0DXAFgZi8Dd7YxllNjgfQ9WrRVoLUY2haV9mKsFUn5LfFFRqW8PT4HvGRmE2Jsb5rZhyX1\nxgK/l/QjYLkQbSuh8wXTUjwtSZKeRlpKFmLM7AFJK0paEdgRWBHY2MzmSJpJi8Db63gyw62BGVEm\n4EdmdluxTUk7dmAIR5pZmQR9UUCtLV+Ryrmi2WKueFpQFKlr17xhZr+VNBKfj7GShprZ4/PWbC4c\nN5HiaUmSJK1J8bSkHuY+mEOvpBfwGu5f8o9YkHyR1lL57+JbJqMkvWlmV+AaIAdLGm1mH4R+y4u4\nf8hBki7GZfu/iMvDzy/34aJpl+J6KvdE+b1RfhkuMlfhWWDd2K5ZEldkvQd4DOgvaYiZjS9s38wm\nxNNiTtYoKORuCqwNtLMoSZIkSapJ8bSkHhZXS6JCcB8Rk3QZcGNs3zyMC7bNJVRQd8IXJrPN7DxJ\nqwMT5A4W/8B9UkZI+hIuFPccvqgoo15RtUNxEbkjgVeBA6L8x8Dlkv4PuL4wzhckXQ1MA2YSifrM\n7H1J3wLOltQXeBv4MjAaODrm5GRg61iUfRj3UEkUWEXX8ylJkiTp7qR4WpJ0kBRPS5Ik6Tg9WjxN\n0pzYIqi87y3p1YiemJ/25lskLMJYZ0QY6cT4pd5eX3vNT19dAUn7SzqrpPyTkm6MkNvp4ZuBpJVr\nzUnM3eCyczXqzw35lTQsLCUdGmeSJEnSNenO2zdvAetL6hPp6LcHnv+IbX6Un797mdnEOusOBPam\nJapkLpJ614gK6WqUzdXxwCgzOwtAnkiwEoWzR6P7jjw4N9ZTN0mSJOn6dFtLSXAzHiUB7vQ49yEv\naQlJF4RI1nhJw6J8gKQxcpGwUpl1SevKRcEqcuxr1jGWeeZSLqV+Rgh2PakWufWTga2i/cPiF/31\nku7Aw3GRdKSkh6L/4wpt7lMY2x/l9Iq+psjFxA5ra6ByEbH7Yl7uDSfVimXhOkl/l/SYpN8Wrjkg\nyh6gJSy3mpWBFypvKuHDRSuUpMUlXRGWlL9SiI6RtH2M62FJV8l1QpD0VbkY28PAroX6cy0hcuGz\nqWGpuqswplVq3E+tvn4jaVrM+yltzWOSJEnSYMysW/7hKefXB64B+gATcY2MG+L8SbiuBnhkyWO4\nYNjiwGJRvhYwLo4HAFPi+Ezc8gFuTerTzlhG446hE+Lvt1E+HLgqjtcBnojjbSvjjPf7406hy8T7\n7YE/x7Fwa8BWeCTIDUDvOHcOHpEyGLdQVNrr1854lwJ6xfF2wLWFcTwZ5/sAzwCr4CJrz+JaJIvg\nkS9nlrQ7FM/aewcuYb9yydz+BDg/jjcA3o/xrwDcDfSNc/+HZ/HtE3OzRpRfVfiM96+MA5hS6K9f\nO/dTq6/lgRntzSOdLUpS+FtppQGWJEnSHQDM2nm2d+ftG8yVRVfHrSQ30TokYigwTNJR8X4xYDXg\nZTwCYxAeYfGZkqbvB46RS5qPMLMn6xjO3la+ffO3GOujahEjK+M2M3u9MPbt1RIls2SMcyNgCDBO\nkvAF1ixgJDBQ0hm49WhUO2NdFrg4LCRG6228O8zsTQBJ0/EFxSeA0WZWkaS/ipJ5M7NRkgYCX8Xz\nyUyobOEU2AZXY8XMpsqjfQA2B9bFtUEELIp/DmsDT5vZ01HvUuBA5uVe4KLwXSlqn5Tdz3Ilfd2H\n67G8I+l8/Ps0smzy4m5rn/oYmTWra0UBJUmSfBS69aIkuAH4Ha5etWKhXMA3zeyJYuXYCnnFzDaU\nJ3x7p7pBM7sitil2Am6WdJCZ3dXOOGo9Hd6tow60FgwTcLKZnVc19kOAC83smHk6lzYCvgJ8H/ff\n+G4bfZ0A3Glmu0oagFt6ysY7h5bvSF1PPzP7D660emU4pG5DhOXWQIXXUWa2T6uTfl/1iJ4dLNcV\n2QkYrxbn2bL7Ke0r+tsMtx7tDhwSxyU0F46bSPG0JEmS1ixs4mmVB9VfgH+b2XRJ2xbO34prXPwI\nQNIgM5uEb+VUHGL3A+bJRCtpoJnNBM6StBqwIXCXpNuBfc0dN2uNp54xzwaWbqPercDxki43s7ck\nfQrf5rgD+Juk083sVUnLRTtvAe+Za4Q8TsiuS/ohbi77Q1X7/XChM2jR/WiLB4HTo7838Qf2pHlu\nzvU9HjDXNFkaT7L3XFW1MbjY2V1hRdkwyh/ALVhrmtlT4eOxCq4gO6DwmZRGLclFz8bhVqSvAp9u\n435q9fUSsISZ3SLpfnzrpwbNbTSfJEmSLGziaZUIjBeBs0vOn4A/SKfgi4GZeOK3PwDXyTPH3kJr\nC0WFPSTtiy8EXgZOCjP/mkRW3RIulfRO9PWqmQ1lXht/5f0UYI48b8yFuB9GSyWz2+Rqq/d7t8wG\nvh1bQMfiAma9gPeAH+JJ44ZHmQFHR1Nr49sa1fwO3+o4Ft+mqEVljl+R1Iw/zP9NyYIkGII/7N/H\nHX/PNVdPLSp7/THGOh33w3k4+vinpO8AV0jqE30fa2ZPSPo+brF6C1dnXarsnioOu8DtZjZF0sY1\n7qe0L3yer5dUcb79Se2p6RrbJimaliRJTyLF0+pE0nrAAWZ2ZGePpV7kmi27mtkHnT2WnoRSPC1J\nkqTDqA7xtFyUJB8JSR8Ck3HLyAfAIWb2QBv1BwAjzWyD+exvNB56/A5u4TjRypP6daTN44DZZnZa\nnfVzUZIkSdJB6lmUdOftm6Rr8JaZDQaQNBT4De17fX7UJ3pHhOqSJEmSbkJ3F09LOp/iqncZwudG\n0pKSbg9xssmSdi7UW1TSpZIekXS1XFDti5JGzG1U+rJcXK2MMqG6w0M8bYoK4nFtlB8jF1QbA3yu\nUH6oXNhtkqTLOzwbSZIkyXyTlpLko9I39FT64iJrX4ryd/CswW9KWgF3kq3kJfoc7p/zgKQLgIPN\n7DRJ50hawcxew6OCLqjR56WS/otbXLbDZfv3BzbFo6kelKu69m6jfA888mcxPGT54Wj7p8Dq5lmF\n+9W66XBA7jRWWmkAr7zyTKeOIUmSpNGkpST5qLxtZoPNbB3ga0Q4Mv7dOjnE0W4HPqUW8bjnCn4n\nl+JqtcS135a0DC6m9vcafe5tZhtHv/+O60eY2X/N7C3gOlwfpVb51lH+rpnNpmWxBO4fc7mkfXBx\nvRp0rpjrrFnP1h5akiRJNyUtJUnDCMvHipJWxHMSrQhsbGZzJM2kJc9NrVDpC3FJ/XeBa8xsTo2u\n2jNTqNCmSsqtjTZ2xBcuO+OqvuuXj6O5cNxEiqclSZK0Zn7E0zL6JvlISJptZkvH8dq4ONpKuGjd\nmmZ2WIiq3QGsTotmzBZm9qCk84BHzOz30cYNwMbAl83ssZL+RgNHmNmEQtnGeJ6hzfGtmQfwnEC9\n2in/PL59Mx74U2whDTCzZyUtGuNc18zeqBqDdb7MvMj/d5Mk6U5k9E3ycbC4WnL0AOxnZibpMuDG\n2L55GBdKqzAD+KGk4cB0XFCtwmXAimULkmCeJ7GZTZR0ITAuzp9rZpMB2ii/ChexmwU8FGWL4P4q\n/eJ+zqhekLTQ+T4lSZIkPY20lCRdCklnARPMbHhnj6UWqVOSJEnSceqxlKSja9JlkPQwsAHu/FoJ\n250WIcUT5An3Gt3ntpK2aHS7SZIkScfJ7Zuky2Bmm1SOJW0O7AAMMrMPJC2P+380miY8yeD9C6Dt\nJEmSpAOkpSTpqqwM/LOSt8fM/gWsKuk6AEm7SHpb0iKS+kh6KsrXkPR3SeMk3S3ps1G+oqRrJT0Y\nf1uE5P3/Aj8OS8yWknYLsbWJoWlSiqSG/fXvv/qCnckkSZJuQlpKkq7KKOCXkmbgkTtXAWOBjeL8\nVsBUXBhtUTyyBuBc4Ptm9pSkzXAn2u2AM4DTzOw+SZ8GbjWzdSX9iULeG3lW6aFm9nJb4mmNjL6Z\nNatrZBxOkiTpbHJRknRJzOwtSYNxobMvAVcCPwOeitDjzYDTgG3xcN97JC0JfAG4Ri2Sq4vG65eB\ndQrlS0laoqTre4GLJF0NfKREf0mSJEnHyEVJ0mWJEJcxwBhJU3HJ+DG4cux7uFLsRfg25FHx+u9K\ngsAqBHzezN5vVVglF29mB4dD7U7AeEkV1dgqmgvHTaR4WpIkSWtSPC3pMYQvyBwzezLen4An/LsW\nl6O/0MyOk3Q/8EkzWzPq3QucbmbXxvsNzWyKpEuBSWZ2apRvZGaTJR0O9DOz5ihfw8yejuMHgQPN\nbErV2BosnpZCaEmS9HwyJDjpziyFb6NMkzQJWAc3TzwEfBK3mIALoBUXDfsA35Vn+Z2Gy8UDHAZs\nEuHF04DvR/mNwDcqjq7A7+QZhacAY6sXJC2oYX8phJYkSeKkpSRJOkiKpyVJknScTrWUSLpT0vZV\nZYdJOqfB/ewSjo/t1RsuadeS8m0l3djIMc0PkmaGFkex7C+SDqwq20XSzQ3uuy4BMUnHxXZHdfmA\n8PnoSJ8fhnViqqSrJC3e/lU129pfrgQ7v9f2n9++kyRJksaxILdvLgf2qirbM8obydeB9T5iG13h\nZ2/ZGK7g45nDJjxq5aPQ0Tl8y8wGm9kGwPu4XkgrCpEyC6L/Ct8BVpnPa5MkSZIGsiAXJdcBO8iT\nnBFCVSub2dh4f6Skh2Lv/7jKRZJ+IWmGpDGSLq/8Mi8TxYpf9zsDp8Sv7oGSvhftTpR0TdUv8O3j\n+hmSdqwesKQlJF0g6QFJ4yUNi/J15YJbE2K8a5Zc+4fod2rV/cyU1BztTVaLmNfykm6N+udRnuHt\nDuBzklaqjA8Pbf1bvN+nMK4/Vh7ikr4r6bG4j3MlnRnl9QqI7VSYg1GSPlEY0yBJ90X73yuZh16S\nTon2J1VbempwD7BWWFxmSLooLC+rStpL4eMh6TeFfg6o3COwZaG8lUVM0uzC8U+jnYmSfi3pm8Am\neBK+CXIRtt9Imh5jP6XWgJWiaUmSJI3HzBbYH3ADMCyOfwqcEsfbA3+OY+HOhlvhD4gJuLbEUsDj\nwOFR73ZgzTjeDLgjjocDuxb6XK5wfALww0K9m+N4LeB5XLZ8W+CGKD8J2DuOlwEeA/oCZwJ7Rfki\nQJ+Se102XnsBo4H14/1M4OA4/gGeqRZczOvYON4B+BBYvqTdM4EfxfG3gKvjeO2Y397x/hzg27gS\n6swYf2/cIfTMqHMZ8IU4/jTwSBwfV5nnyr0Xjr8L/K5Qb2LM2wrAc0B/YAAwJeocCPw8jhfDM/QO\nKLmv2YX5/BvueDog5mHTOLcy8CywfMzrHfgitH+hfBFcW6Ryj9Xfhzfi9WtRr0/V5zUa2DiOlwdm\nFK7tV+N7bWAN+MOSJEkWFuLfvDbXDQtap+RKfLvhxnj9nygfilstKinvlwQ+A/QDrjfXknhf4euh\ntkWxqtlA0onAstHurYVzVwOY2ZNyWfJqX5ShwDBJR8X7xYDV8Lwox0haFRhhEaZaxZ5hFVgEf2iu\nC0yLcyPidTzwjTjepnJsZjdLKtHCAHwOfwechc/hxVG+HTAYGBdzsjgwC5gN3GVmrwNIugafW6hf\nQOzTcvGwlfF5nlk4d72ZvQe8JulOfIE4uXB+KP4Z7B7v+0X/z1b10Tc+f3BLyQX4NsozZjYuyjcF\nRptLzCPpMnzeVFV+VeEea7EdMNzM3gUws/8UzlXm43XgHUnnAzcBI9tpM0mSJGkgC3pRcj1wmqSN\ngb5mNjHKBZxsZucVK0s6rEY7bYliVXMhsLOZTZO0P24JqVD0OxDz+iEI+KaZPVFVXtkm2Am4WdJB\nZnZXYdyrA0cAQ8zsDUnD8UVChXfj9UNqz3mp/4S5LPrKkjYEtsCtJZX6F5nZMa0akXap1RZ1Cojh\nC6BTzewmSdviFpK5Q6pqr2wOf2Rmt9UYQ4W3qz/PGMdbJe1VYzXKAT4gtiVj8VV3Ej8z+1AuTb8d\nsDtwSByX0Fw4biLF05IkSVozP+JpC1SnxMzeAu4C/oI7bVa4FfifsIAg6VPhtzAWt1T0kbQUvgjA\nzGYDMyXtVmkgHtLgloFijpKlgFckLYprVhTZXc6awEB8e6bIrcChhT4GxetAM5tpZmfhC60Nq67r\nh2eanS33//ha2zMD+LbKPtH+13DLTi2uxpVL/x5WCvCtjN0q/h6SlpO0Gr5dso2kZeT+PN8stDMK\n1+uo3F8lj0z1HPYDXorj/avGsoukxSStgC/4xlWdvxU4WC2+RJ+R1LfkntpaOFV4KO5leUm9caff\nuwvly8XnvHvhmmfwbUCAXWixqN0GHFAZi6TlovyNuN+KRW5ZM7sFOJx5P+cCzYW/ptrVkiRJFlKa\nmppobm6e+1cPH4d42hX4P+5zFyXxK/py4H65SNU1wFJm9jDuJzEZN59PwU3q4P4SZaJYVwJHyZ0y\nBwK/wB9a9wCPVo3luTh3E5607b2q8ycAi4Yz5DTg+CjfQy7iNRGP9Lm4eJG5wNak6O9S3Hdh7uka\n8/Ir/ME6FY8geq5GPWiZw7lRN2b2KHAsMErSZHzB0d/MXgJ+XZiDmbTMYb0CYs3AtZLGAa9WjWUK\nvtC8DzjezF6pOn8+8AgwIe7tT5Rbh2rNy9zyaPvo6G8iMM7MbozyZjwJ3z3RX4XzgG3js9qcsLyY\n2a34d+vh2DY6IupfBPwpypYCRsZ8jgF+UmOMpGhakiRJ4+ly4mmSljRPxtYXfzAcaGaTOntc3YnC\nHPbG/VkuMLPrO3tcPQWleFqSJEmHUTeVmT83fuWOB65ZkAsSSceEBWRyWAk2XVB91Tmez8tDcSfK\nw1J/OZ9NNcccTgWe7moLEkmj5RmAq8t7y0N1H4/PY4KknzWw35GS+rVfM0mSJOkMulyWYDOr9gNZ\nIEjaHA/FHWRmH8jVVOt2ilxAXATsFk66Aj43P42Y2VHt1+qSnITntVnPzN4PH48jyipqPswVZrZT\nA8aYJEmSLCC6oqXk42Jl4J9m9gGAmf2r4h8habv4lT5Z0vnhTNlKCl7SEEmj4/g4SRerSlRMUn+5\n0NuE8FPZsnQkLXwCD+utCFjMiHY2jbbHS7pX0meivJW8uqQbJW0Tx1+N+hMl3RZl8y0OV0QucPdg\n3NOfCuWj5eJjD8pF0LaM8sUlXRHWn7/SOjKpcm1f4HvAIZXoIDN7y8yOj/NlwmrzCNZJ+oo8nLnS\n7raSbij5/OYRnpMLvw2P+5qs2tFgKZCWJEmyAFiYFyWjgNXiQXdO4WHeBxfg2t3MNsKjN34Q11T/\nMi++34AWufZfyvOp7A3cEqGvG+HOsG1xOh5+fJ2kg2Is4A60W5nZEDw89+QaYyDuYUXgXOAbZrYx\nLdEpx+Cic5sDXwJOjcXA/wKnxzg3AV5oZ5xnmdnnzWxDYAm1VsftbWafx51Em6PsB7is/Hox/k2Y\nl7WAZ83s7Tb6XQs428w2MLPncZG2zfC5bZK0Pi6yt5laIn6+RYuTtcX8rB3lX4h7noNHQg0CVjGz\nDeOzH157KFbX36xZ1fIsSZIkSS0W2kVJhCsPBg7CI0yulLQfvmXytJk9FVUvwgW7oHYYK4SomJm9\nBlRExcbhoc+/BDaMPtsa0wnAEHzBtBdwS5xaFo+GmQr8Hhdma4vNgbvN7LlotyIUNhQ4Wu5vchfz\nisMdBaxeERhrg+3C2jIF+CKtcw/9NV7H4wqt4PN3aYxlKq3F1kqR9J2w8jwnqZKb5tmCsBq4YN14\nPDJnXWBdM/sQn7dhckffHfEw7lbjp0V4biK+QFsDeBoYKOkMSV/BQ6WTJEmSj4ku51PycRI+CWOA\nMfHA3w+3ZrQrzMW8WxDziIqZ2T2StsYfjBdK+n9mdmk7Y5oJ/FmuKvqqXE/jBOBOM9tVnqtmdMl4\nqsdU6x46LA5XJKw35wCDzeyl2DZphFDck7jlasnYtrkQn7MpuFw+FITV1LZg3VW48Nm/8TDiautL\nqfBctLsR8BU8XHoPXGa/hObCcROpVZIkSdKaLiee1pWRJ/Rbq1A0CJdCfwwYIGmNKN8XtyqAa34M\nieOiKBmUiIrJxcz+YWYX4Podg6PviyTNs4UhaYfC28/ii47/4HlsXozyAwp1nsET5EnSp3HrDLh+\nx9axgCkKhXVIHE7S7ZJWrhrm4vgC7DW5wN1utE9RKG59SkTJzOwdXGr+7Mq2VVg6is7HxcVMW4J1\nd+NzfSCuY1N9fanwXHx2vc1sBK53s3HtW2omxdOSJElqMz/iaQuzpWQp4CxJy+AP/yeBg8zsXUkH\n4NslvfEtmD/HNccDF0h6nZaFSoWKqNgKhKhYbAcdJel9fCtgv6i7IS2KqUX2lXQa8HaMaW8zM3m2\n2oskHYsLvwFgZmMlPQNMx/1Oxkf5PyUdBIyQJOAf+K//E4HTw/rQC9+u2BkXh9sXeB94GTgprlsT\n+FdxgGb2ujyr8fSo+1DxdMk9AfwRGC6pMs6Ha9Q7FrcKTZP0BvAOvn32Ep4XpyisNkVSRbDueQqC\ndWY2R9JIXI12v5bm/XozezTmcpSkXsB7wA+B/8Y4e0Xdo2uMk7Z38lpIgbQkSZL66XLiad2R2MKY\nbWan1VF3aeB8M/tWe3U7E0nrAQeY2ZGdPZauhlI8LUmSpMOom4qn9Wgij89u8lDUifG6WkfakHRu\nRJCgBoqLVY1zetmCRB4yu2uN8qfVInp2SEf6kzRM0v/F8S6V+yupd5ykw9tpa4ik0zvSf5IkSdL5\npKWkE5D0hpnVVBaV1DuiSOppa7aZLd240bXb33DgRjP7a0n5DeGPUevaXmY2p84+RprZdSXn6rZK\nLSjSUpIkSdJx0lLSdZnnQ5ELoV0v6Q7gdrno142F82eFj8pcmXZJJwN9wzJxiVwcbWRYYKZI2r2k\nn+/JBccmSrpG0uJRPlweCjtW0pNFa4iksyU9KmkUrrhai3m+T5JmSzpVHnq7hWoL0O0f97gF7udy\nStzXwJqTWJCrl7SCpJlxPHfuJC0p6S8xH5MkfSPKt5cL0j0s6SpJS0T5b+SpByaFL0+tvlMkLUmS\npMHkoqRzqCwkJkoqWgM2BnY1sy/G+zZ/jpvZz4C3zWywme0LfBV40cw2DmGzW0ouu87MNgtRtRm0\nDnntb2ZbAsOA3wLE4uQzZrYO7jj6hTaGdEphS6qiXbIkcH+MaWzJPRXfm5ndj2fzPSrua2Zbc9BW\nW/H6C+A/IYg2CLhTHmVzLLCdmW2COwgfHoulr5vZ+lH3xLa7SpG0JEmSRrIwR990Jm+Hkmg1t5nZ\n6x+h3am4SuvJwE1mdm9JnQ0knYgLsi2JhwlX+BvMjU6pWES2JhRRzexlSXe20f9R1ds6eBRRsay+\nsJXG8WVcvRWYGz20Iy62NjaijBYF7gNeB96Ra8TcBIys3Wxz4bip0WNOkiTp9syPTkkuSroWRcXX\ntoTRisx9yJvZE7GdsQNwoqTbzaz61/6FwM7mSf/2xzVVKhSVXBu1ePhvlQNGWwJ0HWV+2xIwykqS\nP0raDFd83R0XYNuuvInmDnSXJEmy8NHU1ERTU9Pc97/61a/avSa3bzqHeh74zwLrSlpU0rLUfDjy\nnlxPBbnQ2TtmdjnwO0KsrYqlgFfkSQbbyshcGeMY4FvyZHUr47LyHaH6XtsSoKswGxdHa49naMmj\nM4//THAbrkHig/G5fADYUpF4MHxxPiPPSrysmd0CHE6JyFuSJEmy4MhFSefQbuiGmb0AXA1Mw1VJ\nJ9S4/lxgqqRL8KSAD4VT6S8p94n4BS54dg8uPFZrTBWhsRG4sNx03MpyXwfvqbr8eOBMSQ/hlo4y\nrsRF58aXOLouQotF51TgB/L8N8vXaOtEYHl5JuGJQJOZ/RP4DnCFpMlxT58DlgZGRtkYPKlgDdTq\nL0XSkiRJPjoZEpx0KyT9FTg3rBmdNYYMCU6SJOkgGRKcdAqSPowInEkRcrt5g9qdgltXRjWi3Ekq\n7wAACQxJREFUvSRJkqRrkZaSpOEUxeEkDQV+bmZNC7C/j9V0kZaSJEmSjpOWkqSzKH7plqGQ1E/S\nkSHeNkmuzoqkkyUdXKgzV0q+Rv0BkmbIsy1PBVaV9IeoN7VSL+ruEMJv40IcriKqtoSkCyQ9EL4r\nw6J8XUkPFiw9a5beYAqmJUmSNJy0lCQNR9IHeNbkvkB/4EtmNlHS9sBuZvb90Ae5ARdpexM4vWJN\nkWcTHopriZTVfx54CtjCzMbFNcua2X/kGX7vAH4EPBF/W5nZc5IuB5Yys50lnQRMN7PL5ZmiHwIG\nRfv3m9kVkhYBeptZMVQaSeb+uyL//0mSJKmPeiwlqVOSLAjmisOFP8klwPr4QmN7SRNwa8qSuFrs\ncEmfkNQfl7H/l5m9KOnHZfXxRcmzlQVJsKekA/HvdH98QdMbeMrMnos6VwAHxvFQYJiko+L9YsBq\nwP3AMZJWBUaY2ZPlt9js/21unicWP0mSJJk/8bS0lCQNR1UJByW9gi9KjgYeM7PzSq5pBl7DFxQv\nm9nZkk4tqy9pAJ4UcMN4vzquRzLEzN6QJ/QbDUwGzihYYIYBB4al5GFgLzN7omQsA4GdcGvLQWZ2\nV9X5tJQkSZJ0kPQpSTqLuV86SWvj37PXcEn7/wmRMiR9StInourVwJ64oNo1UdZW/eIXux++BTRb\n0krA16L8MWCgpNXi/bcK19wKHFoY56B4HWhmM83sLOB6UkAtSZLkYyO3b5IFweKFLReA/SJc5bZY\npNzvLiLMBr4NvGpmj0haGnjBzGYBmFmt+nMoiLKZ2RRJk3AxuOeBe6P8v+FAe6ukN4FxhetOAE6P\nMONewNN4duI9JO0LvA+8DJxUfospmJYkSdJocvsmaRdJH+JbIcIf6l8v+GlU1x0AfMHMrvgYxzcT\n37r5V1V5P+CPwGZR9AEw3MxOkUvmn2Fme0jaFjjSzIbV2V+GBCdJknSQdHRNGsVbNbIalzEQ2JvI\nLPwxUWuFcAHuHPsm7sj6AbFAMbOXgT3qaCNJkiT5mMhFSVIP86xswyJyCbBEFB1iZg8AJwNrx/bN\nRbgD6nBgUXyb5Jtm9lRVW3/AE+v1Ba41s19F+cxoYxj+Xd3dzB6XtDy+6PkUnlyvbHxr4gkJ16qY\nNSJc+MlwZJ0DjDSzDaqu2xY4HV+kGLCNmb1FkiRJssBJR9ekHvqGmNhESddF2Szgy2a2Ce6gelaU\nHw3cY2aDzewM4H9xDZLB+MLjhZL2f25mmwEbAU2S1i+c+4eZDQH+BBwZZcdFHxsAI/BQ3mrWBSYV\n91nMbA4wCVivUlRy3RHAwTHerYF3yiYkhdOSJEkaT1pKknp4u2T7ZjHg7Iha+RDXDymjHt2PMo2R\naXFuRLyOB74Rx9tUjs3sZkn/no97qsVY4PeSLgP+amYvllWaNevZBnaZJEmSQC5KkvnnJ8ArZrah\npN7UsCiEMuoDuO7HzZJa6X6ExsgRtNYYWbzQREVN9UNqf1/LHKcewRVaWyp5CM+gOFeKmf1W0khg\nR2CspKFm9nhZ3ebmZoAUT0uSJClhfsTTcvsmqYeyh/4yeMgswH64eip42O7Scy9sX/ejlsZIW4wB\n9on2vwYsW10h/FYmSvpFofgXwHgze7pWw5LWMLPpZnYKHkK8dq26zc3NcxVdexId/UekO9GT7w3y\n/ro7Pe3+mpqa5v47WfkR1x65KEnqocz34g/AdyRNBD4LVJxBpwBzwv/kMFz3Y1rUWw+4uFXDZlNw\nP49HgUsJjZE2+gX4FbBNJOP7OlAangx8F/ispCclPQGsFWVt8eNI6jcJeA/4ezv1exw97R/GIj35\n3iDvr7vT0++vHnL7JmmXomR8oexJ3DG1ws+i/ANgu6rqv22n/QNqlK9ROB4PfCmO/wV8pY5xvw7s\nW+Pcs4TVxszuBu6O40PL6leTwmlJkiSNJy0lSTIfvPLKM509hCRJkh5HKromSQfxhHxJkiRJR2lP\n0TUXJUmSJEmSdAly+yZJkiRJki5BLkqSJEmSJOkS5KIkSepE0lclzZD0uKSfdvZ4GomkCyTNkjSl\ns8eyIJC0qqQ7JU2PkO+6oqy6C5L6SHowQvGnS/p1Z4+p0UjqFekubujssTQaSc9Imhyf30OdPZ5G\nI2kZSddIejS+n5+vWTd9SpKkfSKZ3+N4uPNLuLDanmY2o1MH1iAkbYWL2F1sZtUCd90eSf2B/mY2\nSdJSeNqCXXrK5wcgaQkzezsUlscCR5jZ2M4eV6OQ9BNgCNDPzHbu7PE0EklP46rWjUyZ0WWQdCFw\nt5kNl7QIsISZvVFWNy0lSVIfmwFPmNmzZvY+cCWwSyePqWGY2b1Aj/wHEcDMXjGzSXH8Ji7Wt0rn\njqqxmNnbcdgH/7e9x3yekTtrB+D8zh7LAkL00OexpH7A1mY2HFzLqtaCBHroJCTJAmAV4PnC+xfo\nYQ+1hYXItzQIeLBzR9JYYntjIvAKcJeZ1czx1A35PXAUtVWeuzsG3CZpXCQn7UkMBP4paXhsv50r\nqW+tyrkoSZJkoSG2bq4FDguLSY/BzOaY2cbAqngahm07e0yNQNKOwKywdInyXFzdnS0jE/sOwA9j\nO7WnsAgwGDgn7vFt4OhalXNRkiT18SKwWuH9qlGWdBNiL/ta4BIzu76zx7OgCNP4TcAmnT2WBrEl\nsHP4XVwBfFHSxe1c060ws5fj9VVgBL5d3FN4AXjezB6O99fii5RSclGSJPUxDlhL0gBJiwF7Aj0t\nCqCn/gqt8BfgETM7o7MH0mgkrShpmTjuC2yPJ7rs9pjZz81stciFtSdwp5nt19njahSSlggLHpKW\nBIYC0zp3VI3DzGYBz0v6bBRtB9TcWsyEfElSB2b2oaRDgFH4Yv4CM3u0k4fVMCRdDjQBK0h6Djiu\n4pjWE5C0JbAPMDX8Lgz4uZnd0rkjaxgrAxdJqjhMXmJmd3TymJL6WAkYEekrFgEuM7NRnTymRnMo\ncJmkRYGngdIkrJAhwUmSJEmSdBFy+yZJkiRJki5BLkqSJEmSJOkS5KIkSZIkSZIuQS5KkiRJkiTp\nEuSiJEmSJEmSLkEuSpIkSZIk6RLkoiRJkiRJki5BLkqSJEmSJOkS/H9nSGE8e3zuuwAAAABJRU5E\nrkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "result['Zinc, Zn'].sort_values().plot(kind = 'barh')" ] }, { "cell_type": "code", "execution_count": 249, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 249, "metadata": {}, "output_type": "execute_result" } ], "source": [ "by_nutrients = ndata.groupby(['nutgroup', 'nutrients'])\n", "by_nutrients" ] }, { "cell_type": "code", "execution_count": 250, "metadata": { "collapsed": false }, "outputs": [], "source": [ "get_maximum = lambda x: x.xs(x.value.idxmax())\n", "get_minimum = lambda x: x.xs(x.value.idxmin())" ] }, { "cell_type": "code", "execution_count": 251, "metadata": { "collapsed": false }, "outputs": [], "source": [ "max_food = by_nutrients.apply(get_maximum)[['value', 'food']]" ] }, { "cell_type": "code", "execution_count": 252, "metadata": { "collapsed": false }, "outputs": [], "source": [ "max_food.food = max_food.food.str[:50]" ] }, { "cell_type": "code", "execution_count": 253, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "nutrients\n", "Alanine Gelatins, dry powder, unsweetened\n", "Arginine Seeds, sesame flour, low-fat\n", "Aspartic acid Soy protein isolate\n", "Cystine Seeds, cottonseed flour, low fat (glandless)\n", "Glutamic acid Soy protein isolate\n", "Glycine Gelatins, dry powder, unsweetened\n", "Histidine Whale, beluga, meat, dried (Alaska Native)\n", "Hydroxyproline KENTUCKY FRIED CHICKEN, Fried Chicken, ORIGINA...\n", "Isoleucine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Leucine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Lysine Seal, bearded (Oogruk), meat, dried (Alaska Na...\n", "Methionine Fish, cod, Atlantic, dried and salted\n", "Phenylalanine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Proline Gelatins, dry powder, unsweetened\n", "Serine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Threonine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Tryptophan Sea lion, Steller, meat with fat (Alaska Native)\n", "Tyrosine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Valine Soy protein isolate, PROTEIN TECHNOLOGIES INTE...\n", "Name: food, dtype: object" ] }, "execution_count": 253, "metadata": {}, "output_type": "execute_result" } ], "source": [ "max_food.ix['Amino Acids']['food']" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }