{ "metadata": { "name": "", "signature": "sha256:46cdb303e7e791d8c7056413f93df8cd9403aea2aae02bf3546fa083d0d4fcf9" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "%pylab inline" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "from pandas import Series, DataFrame" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Introduction to pandas Data Structures" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Series" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series([4, 7, -5, 3])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "obj" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 5, "text": [ "0 4\n", "1 7\n", "2 -5\n", "3 3\n", "dtype: int64" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "array([ 4, 7, -5, 3])" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": true, "input": [ "obj.index" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ "Int64Index([0, 1, 2, 3], dtype='int64')" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2 = Series([4,7,-5,3], index = ['d','b','a','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 10, "text": [ "d 4\n", "b 7\n", "a -5\n", "c 3\n", "dtype: int64" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2.values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 11, "text": [ "array([ 4, 7, -5, 3])" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2.index" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ "Index([u'd', u'b', u'a', u'c'], dtype='object')" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2['a']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ "-5" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2[['c','a','d']]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 14, "text": [ "c 3\n", "a -5\n", "d 4\n", "dtype: int64" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2[obj2>0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 15, "text": [ "d 4\n", "b 7\n", "c 3\n", "dtype: int64" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2*2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 16, "text": [ "d 8\n", "b 14\n", "a -10\n", "c 6\n", "dtype: int64" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "np.exp(obj2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 17, "text": [ "d 54.598150\n", "b 1096.633158\n", "a 0.006738\n", "c 20.085537\n", "dtype: float64" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "'b' in obj2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 18, "text": [ "True" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "'e' in obj2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 19, "text": [ "False" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "sdata = {'Ohio':3500,'Texas':71000,'Oregon':16000, 'Utah':5000}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "obj3 = Series(sdata)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "obj3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 22, "text": [ "Ohio 3500\n", "Oregon 16000\n", "Texas 71000\n", "Utah 5000\n", "dtype: int64" ] } ], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "states = ['California','Ohio','Oregon','Texas']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "obj4 = Series(sdata, index=states)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "obj4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ "California NaN\n", "Ohio 3500\n", "Oregon 16000\n", "Texas 71000\n", "dtype: float64" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.isnull(obj4)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 26, "text": [ "California True\n", "Ohio False\n", "Oregon False\n", "Texas False\n", "dtype: bool" ] } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.notnull(obj4)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 27, "text": [ "California False\n", "Ohio True\n", "Oregon True\n", "Texas True\n", "dtype: bool" ] } ], "prompt_number": 27 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Series also has instance methods." ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj4.isnull()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 28, "text": [ "California True\n", "Ohio False\n", "Oregon False\n", "Texas False\n", "dtype: bool" ] } ], "prompt_number": 28 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Automatically aligns differently indexed data in arithmetic operations." ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 29, "text": [ "Ohio 3500\n", "Oregon 16000\n", "Texas 71000\n", "Utah 5000\n", "dtype: int64" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "obj4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 30, "text": [ "California NaN\n", "Ohio 3500\n", "Oregon 16000\n", "Texas 71000\n", "dtype: float64" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "obj3 + obj4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 31, "text": [ "California NaN\n", "Ohio 7000\n", "Oregon 32000\n", "Texas 142000\n", "Utah NaN\n", "dtype: float64" ] } ], "prompt_number": 31 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "name attribute" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj4.name = 'population'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [ "obj4.index.name='state'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 33 }, { "cell_type": "code", "collapsed": false, "input": [ "obj4" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 34, "text": [ "state\n", "California NaN\n", "Ohio 3500\n", "Oregon 16000\n", "Texas 71000\n", "Name: population, dtype: float64" ] } ], "prompt_number": 34 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "index altered in place" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj.index = ['Bob','Steve','Jeff','Ryan']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "obj" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 36, "text": [ "Bob 4\n", "Steve 7\n", "Jeff -5\n", "Ryan 3\n", "dtype: int64" ] } ], "prompt_number": 36 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "DataFrame" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = {'state':['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],\n", " 'year':[2000, 2001, 2002, 2001, 2002],\n", " 'pop':[1.5, 1.7, 3.6, 2.4, 2.9]}\n", "frame = DataFrame(data)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
popstateyear
0 1.5 Ohio 2000
1 1.7 Ohio 2001
2 3.6 Ohio 2002
3 2.4 Nevada 2001
4 2.9 Nevada 2002
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 38, "text": [ " pop state year\n", "0 1.5 Ohio 2000\n", "1 1.7 Ohio 2001\n", "2 3.6 Ohio 2002\n", "3 2.4 Nevada 2001\n", "4 2.9 Nevada 2002" ] } ], "prompt_number": 38 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "column that is not contained in the data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame2 = DataFrame(data, columns = ['year','state','pop','debt'],\n", " index=['one','two','three','four','five'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 39 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 NaN
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 NaN
five 2002 Nevada 2.9 NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 40, "text": [ " year state pop debt\n", "one 2000 Ohio 1.5 NaN\n", "two 2001 Ohio 1.7 NaN\n", "three 2002 Ohio 3.6 NaN\n", "four 2001 Nevada 2.4 NaN\n", "five 2002 Nevada 2.9 NaN" ] } ], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2.columns" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 41, "text": [ "Index([u'year', u'state', u'pop', u'debt'], dtype='object')" ] } ], "prompt_number": 41 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "column retrieved" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame2['state']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 42, "text": [ "one Ohio\n", "two Ohio\n", "three Ohio\n", "four Nevada\n", "five Nevada\n", "Name: state, dtype: object" ] } ], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2.year" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 44, "text": [ "one 2000\n", "two 2001\n", "three 2002\n", "four 2001\n", "five 2002\n", "Name: year, dtype: int64" ] } ], "prompt_number": 44 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "rows retrieved" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame2.ix['three']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 45, "text": [ "year 2002\n", "state Ohio\n", "pop 3.6\n", "debt NaN\n", "Name: three, dtype: object" ] } ], "prompt_number": 45 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2.ix[2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 47, "text": [ "year 2002\n", "state Ohio\n", "pop 3.6\n", "debt NaN\n", "Name: three, dtype: object" ] } ], "prompt_number": 47 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "columns modified by assignment" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame2['debt']=16.5" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 48 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebt
one 2000 Ohio 1.5 16.5
two 2001 Ohio 1.7 16.5
three 2002 Ohio 3.6 16.5
four 2001 Nevada 2.4 16.5
five 2002 Nevada 2.9 16.5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 49, "text": [ " year state pop debt\n", "one 2000 Ohio 1.5 16.5\n", "two 2001 Ohio 1.7 16.5\n", "three 2002 Ohio 3.6 16.5\n", "four 2001 Nevada 2.4 16.5\n", "five 2002 Nevada 2.9 16.5" ] } ], "prompt_number": 49 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2['debt']=np.arange(5.)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 50 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebt
one 2000 Ohio 1.5 0
two 2001 Ohio 1.7 1
three 2002 Ohio 3.6 2
four 2001 Nevada 2.4 3
five 2002 Nevada 2.9 4
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 51, "text": [ " year state pop debt\n", "one 2000 Ohio 1.5 0\n", "two 2001 Ohio 1.7 1\n", "three 2002 Ohio 3.6 2\n", "four 2001 Nevada 2.4 3\n", "five 2002 Nevada 2.9 4" ] } ], "prompt_number": 51 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "assign series to a column" ] }, { "cell_type": "code", "collapsed": false, "input": [ "val = Series([1.2, 1.5, -1.7], index = ['two', 'four','five'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 52 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2['debt']=val" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 53 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 1.2
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 1.5
five 2002 Nevada 2.9-1.7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 54, "text": [ " year state pop debt\n", "one 2000 Ohio 1.5 NaN\n", "two 2001 Ohio 1.7 1.2\n", "three 2002 Ohio 3.6 NaN\n", "four 2001 Nevada 2.4 1.5\n", "five 2002 Nevada 2.9 -1.7" ] } ], "prompt_number": 54 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "assigning a column that does not exist" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame2['eastern'] = frame2.state=='Ohio'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 55 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearstatepopdebteastern
one 2000 Ohio 1.5 NaN True
two 2001 Ohio 1.7 1.2 True
three 2002 Ohio 3.6 NaN True
four 2001 Nevada 2.4 1.5 False
five 2002 Nevada 2.9-1.7 False
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 56, "text": [ " year state pop debt eastern\n", "one 2000 Ohio 1.5 NaN True\n", "two 2001 Ohio 1.7 1.2 True\n", "three 2002 Ohio 3.6 NaN True\n", "four 2001 Nevada 2.4 1.5 False\n", "five 2002 Nevada 2.9 -1.7 False" ] } ], "prompt_number": 56 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "delete columns" ] }, { "cell_type": "code", "collapsed": false, "input": [ "del frame2['eastern']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 57 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2.columns" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 58, "text": [ "Index([u'year', u'state', u'pop', u'debt'], dtype='object')" ] } ], "prompt_number": 58 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "nested dict of dicts" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pop={'Nevada':{2001:2.4, 2002:2.9},\n", " 'Ohio': {2000:1.5, 2001:1.7,2002:3.6} }" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 59 }, { "cell_type": "code", "collapsed": false, "input": [ "frame3 = DataFrame(pop)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 60 }, { "cell_type": "code", "collapsed": false, "input": [ "frame3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
2000 NaN 1.5
2001 2.4 1.7
2002 2.9 3.6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 61, "text": [ " Nevada Ohio\n", "2000 NaN 1.5\n", "2001 2.4 1.7\n", "2002 2.9 3.6" ] } ], "prompt_number": 61 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "transpose" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame3.T" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
200020012002
Nevada NaN 2.4 2.9
Ohio 1.5 1.7 3.6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 62, "text": [ " 2000 2001 2002\n", "Nevada NaN 2.4 2.9\n", "Ohio 1.5 1.7 3.6" ] } ], "prompt_number": 62 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that the keys in the inner dicts are unioned and sorted to form the index in the resul. This is not true if an explicit index is specified." ] }, { "cell_type": "code", "collapsed": false, "input": [ "DataFrame(pop, index=[2001, 2002, 2003])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
2001 2.4 1.7
2002 2.9 3.6
2003 NaN NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 63, "text": [ " Nevada Ohio\n", "2001 2.4 1.7\n", "2002 2.9 3.6\n", "2003 NaN NaN" ] } ], "prompt_number": 63 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Dicts of series" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pdata = {'Ohio':frame3['Ohio'][:-1],\n", " 'Nevada':frame3['Nevada'][:2]}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 64 }, { "cell_type": "code", "collapsed": false, "input": [ "DataFrame(pdata)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NevadaOhio
2000 NaN 1.5
2001 2.4 1.7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 65, "text": [ " Nevada Ohio\n", "2000 NaN 1.5\n", "2001 2.4 1.7" ] } ], "prompt_number": 65 }, { "cell_type": "code", "collapsed": false, "input": [ "frame3.index.name='year'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 66 }, { "cell_type": "code", "collapsed": false, "input": [ "frame3.columns.name = 'state'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 67 }, { "cell_type": "code", "collapsed": false, "input": [ "frame3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateNevadaOhio
year
2000 NaN 1.5
2001 2.4 1.7
2002 2.9 3.6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 68, "text": [ "state Nevada Ohio\n", "year \n", "2000 NaN 1.5\n", "2001 2.4 1.7\n", "2002 2.9 3.6" ] } ], "prompt_number": 68 }, { "cell_type": "code", "collapsed": false, "input": [ "frame3.values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 69, "text": [ "array([[ nan, 1.5],\n", " [ 2.4, 1.7],\n", " [ 2.9, 3.6]])" ] } ], "prompt_number": 69 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "different types of columns" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame2.values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 70, "text": [ "array([[2000, 'Ohio', 1.5, nan],\n", " [2001, 'Ohio', 1.7, 1.2],\n", " [2002, 'Ohio', 3.6, nan],\n", " [2001, 'Nevada', 2.4, 1.5],\n", " [2002, 'Nevada', 2.9, -1.7]], dtype=object)" ] } ], "prompt_number": 70 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Index Objects" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series(range(3), index=['a','b','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 71 }, { "cell_type": "code", "collapsed": false, "input": [ "index = obj.index" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 73 }, { "cell_type": "code", "collapsed": false, "input": [ "index[1:]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 74, "text": [ "Index([u'b', u'c'], dtype='object')" ] } ], "prompt_number": 74 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "immutable" ] }, { "cell_type": "code", "collapsed": false, "input": [ "index[1]='d'" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "'' does not support mutable operations.", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mindex\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'd'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Users/sergulaydore/anaconda/lib/python2.7/site-packages/pandas/core/base.pyc\u001b[0m in \u001b[0;36m_disabled\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0;34m\"\"\"This method will not function because object is immutable.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 181\u001b[0m raise TypeError(\"'%s' does not support mutable operations.\" %\n\u001b[0;32m--> 182\u001b[0;31m self.__class__)\n\u001b[0m\u001b[1;32m 183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0m__setitem__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__setslice__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__delitem__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m__delslice__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_disabled\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: '' does not support mutable operations." ] } ], "prompt_number": 75 }, { "cell_type": "code", "collapsed": false, "input": [ "index = pd.Index(np.arange(3))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 76 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2 = Series([1.5,-2.5,0], index=index)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 77 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2.index is index" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 78, "text": [ "True" ] } ], "prompt_number": 78 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Index objects are sets" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateNevadaOhio
year
2000 NaN 1.5
2001 2.4 1.7
2002 2.9 3.6
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 79, "text": [ "state Nevada Ohio\n", "year \n", "2000 NaN 1.5\n", "2001 2.4 1.7\n", "2002 2.9 3.6" ] } ], "prompt_number": 79 }, { "cell_type": "code", "collapsed": false, "input": [ "'Ohio' in frame3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 80, "text": [ "True" ] } ], "prompt_number": 80 }, { "cell_type": "code", "collapsed": false, "input": [ "2003 in frame3.index" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 81, "text": [ "False" ] } ], "prompt_number": 81 }, { "cell_type": "code", "collapsed": false, "input": [ "frame3.index-frame2.index" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 82, "text": [ "Int64Index([2000, 2001, 2002], dtype='int64')" ] } ], "prompt_number": 82 }, { "cell_type": "code", "collapsed": false, "input": [ "frame3.index.diff(frame2.index)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 85, "text": [ "Int64Index([2000, 2001, 2002], dtype='int64')" ] } ], "prompt_number": 85 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Index methods: append, diff, intersection, union, isin, delete, drop, insert, is_monotonic, is_unique, uniuq" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Essential Functionality" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Reindexing" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series([4.5,7.2,-5.3,3.6], index=['d','b','a','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 86 }, { "cell_type": "code", "collapsed": false, "input": [ "obj" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 87, "text": [ "d 4.5\n", "b 7.2\n", "a -5.3\n", "c 3.6\n", "dtype: float64" ] } ], "prompt_number": 87 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2 = obj.reindex(['a','b','c','d','e'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 88 }, { "cell_type": "code", "collapsed": false, "input": [ "obj2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 89, "text": [ "a -5.3\n", "b 7.2\n", "c 3.6\n", "d 4.5\n", "e NaN\n", "dtype: float64" ] } ], "prompt_number": 89 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.reindex(['a','b','c','d','e'], fill_value=0)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 90, "text": [ "a -5.3\n", "b 7.2\n", "c 3.6\n", "d 4.5\n", "e 0.0\n", "dtype: float64" ] } ], "prompt_number": 90 }, { "cell_type": "code", "collapsed": false, "input": [ "obj" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 91, "text": [ "d 4.5\n", "b 7.2\n", "a -5.3\n", "c 3.6\n", "dtype: float64" ] } ], "prompt_number": 91 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "filling of values" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj3 = Series(['blue','purple','yellow'], index=[0,2,4])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 92 }, { "cell_type": "code", "collapsed": false, "input": [ "obj3.reindex(range(6))" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 93, "text": [ "0 blue\n", "1 NaN\n", "2 purple\n", "3 NaN\n", "4 yellow\n", "5 NaN\n", "dtype: object" ] } ], "prompt_number": 93 }, { "cell_type": "code", "collapsed": false, "input": [ "obj3.reindex(range(6),method='ffill') # forward fills the values" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 95, "text": [ "0 blue\n", "1 blue\n", "2 purple\n", "3 purple\n", "4 yellow\n", "5 yellow\n", "dtype: object" ] } ], "prompt_number": 95 }, { "cell_type": "markdown", "metadata": {}, "source": [ "available method options: ffill or pad, bfill or backfill" ] }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "reindexing columns" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame(np.arange(9).reshape(3,3), index = ['a','c','d'],\n", " columns=['Ohio','Texas','California'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 96 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioTexasCalifornia
a 0 1 2
c 3 4 5
d 6 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 97, "text": [ " Ohio Texas California\n", "a 0 1 2\n", "c 3 4 5\n", "d 6 7 8" ] } ], "prompt_number": 97 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2 = frame.reindex(['a','b','c','d'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 98 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioTexasCalifornia
a 0 1 2
bNaNNaNNaN
c 3 4 5
d 6 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 99, "text": [ " Ohio Texas California\n", "a 0 1 2\n", "b NaN NaN NaN\n", "c 3 4 5\n", "d 6 7 8" ] } ], "prompt_number": 99 }, { "cell_type": "code", "collapsed": false, "input": [ "states = ['Texas','Utah','California']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 100 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.reindex(columns=states)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TexasUtahCalifornia
a 1NaN 2
c 4NaN 5
d 7NaN 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 101, "text": [ " Texas Utah California\n", "a 1 NaN 2\n", "c 4 NaN 5\n", "d 7 NaN 8" ] } ], "prompt_number": 101 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Both columns and rows can be reindexed" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame.reindex(index=['a','b','c','d'], method='ffill',\n", " columns=states)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TexasUtahCalifornia
a 1NaN 2
b 1NaN 2
c 4NaN 5
d 7NaN 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 102, "text": [ " Texas Utah California\n", "a 1 NaN 2\n", "b 1 NaN 2\n", "c 4 NaN 5\n", "d 7 NaN 8" ] } ], "prompt_number": 102 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioTexasCalifornia
a 0 1 2
c 3 4 5
d 6 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 103, "text": [ " Ohio Texas California\n", "a 0 1 2\n", "c 3 4 5\n", "d 6 7 8" ] } ], "prompt_number": 103 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "reindexing with ix" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame.ix[['a','b','c','d'], states]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TexasUtahCalifornia
a 1NaN 2
bNaNNaNNaN
c 4NaN 5
d 7NaN 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 105, "text": [ " Texas Utah California\n", "a 1 NaN 2\n", "b NaN NaN NaN\n", "c 4 NaN 5\n", "d 7 NaN 8" ] } ], "prompt_number": 105 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Dropping entries from an axis" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series(np.arange(5.), index = ['a','b','c','d','e'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 106 }, { "cell_type": "code", "collapsed": false, "input": [ "obj" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 107, "text": [ "a 0\n", "b 1\n", "c 2\n", "d 3\n", "e 4\n", "dtype: float64" ] } ], "prompt_number": 107 }, { "cell_type": "code", "collapsed": false, "input": [ "new_obj = obj.drop('c')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 109 }, { "cell_type": "code", "collapsed": false, "input": [ "new_obj" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 110, "text": [ "a 0\n", "b 1\n", "d 3\n", "e 4\n", "dtype: float64" ] } ], "prompt_number": 110 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.drop(['d','c'])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 111, "text": [ "a 0\n", "b 1\n", "e 4\n", "dtype: float64" ] } ], "prompt_number": 111 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Index values can be deleted from either axis" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = DataFrame(np.arange(16).reshape((4,4)),\n", " index = ['Ohio','Colorado','Utah','New York'],\n", " columns = ['one','two','three','four'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 112 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Ohio 0 1 2 3
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 113, "text": [ " one two three four\n", "Ohio 0 1 2 3\n", "Colorado 4 5 6 7\n", "Utah 8 9 10 11\n", "New York 12 13 14 15" ] } ], "prompt_number": 113 }, { "cell_type": "code", "collapsed": false, "input": [ "data.drop(['Colorado','Ohio'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Utah 8 9 10 11
New York 12 13 14 15
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 114, "text": [ " one two three four\n", "Utah 8 9 10 11\n", "New York 12 13 14 15" ] } ], "prompt_number": 114 }, { "cell_type": "code", "collapsed": false, "input": [ "data.drop('two', axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onethreefour
Ohio 0 2 3
Colorado 4 6 7
Utah 8 10 11
New York 12 14 15
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 115, "text": [ " one three four\n", "Ohio 0 2 3\n", "Colorado 4 6 7\n", "Utah 8 10 11\n", "New York 12 14 15" ] } ], "prompt_number": 115 }, { "cell_type": "code", "collapsed": false, "input": [ "data.drop(['two','four'], axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onethree
Ohio 0 2
Colorado 4 6
Utah 8 10
New York 12 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 116, "text": [ " one three\n", "Ohio 0 2\n", "Colorado 4 6\n", "Utah 8 10\n", "New York 12 14" ] } ], "prompt_number": 116 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Indexing, selection and filtering" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series(np.arange(4.), index=['a','b','c','d'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 117 }, { "cell_type": "code", "collapsed": false, "input": [ "obj['b']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 118, "text": [ "1.0" ] } ], "prompt_number": 118 }, { "cell_type": "code", "collapsed": false, "input": [ "obj[2:4]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 119, "text": [ "c 2\n", "d 3\n", "dtype: float64" ] } ], "prompt_number": 119 }, { "cell_type": "code", "collapsed": false, "input": [ "obj[['b','a','d']]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 120, "text": [ "b 1\n", "a 0\n", "d 3\n", "dtype: float64" ] } ], "prompt_number": 120 }, { "cell_type": "code", "collapsed": false, "input": [ "obj[[1,3]]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 121, "text": [ "b 1\n", "d 3\n", "dtype: float64" ] } ], "prompt_number": 121 }, { "cell_type": "code", "collapsed": false, "input": [ "obj[obj<2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 122, "text": [ "a 0\n", "b 1\n", "dtype: float64" ] } ], "prompt_number": 122 }, { "cell_type": "code", "collapsed": false, "input": [ "obj['b':'c']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 123, "text": [ "b 1\n", "c 2\n", "dtype: float64" ] } ], "prompt_number": 123 }, { "cell_type": "code", "collapsed": false, "input": [ "obj['b':'c']=5" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 124 }, { "cell_type": "code", "collapsed": false, "input": [ "obj" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 125, "text": [ "a 0\n", "b 5\n", "c 5\n", "d 3\n", "dtype: float64" ] } ], "prompt_number": 125 }, { "cell_type": "code", "collapsed": false, "input": [ "data=DataFrame(np.arange(16).reshape((4,4)),\n", " index=['Ohio','Colorado','Utah','New York'],\n", " columns=['one','two','three','four'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 126 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Ohio 0 1 2 3
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 127, "text": [ " one two three four\n", "Ohio 0 1 2 3\n", "Colorado 4 5 6 7\n", "Utah 8 9 10 11\n", "New York 12 13 14 15" ] } ], "prompt_number": 127 }, { "cell_type": "code", "collapsed": false, "input": [ "data['two']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 128, "text": [ "Ohio 1\n", "Colorado 5\n", "Utah 9\n", "New York 13\n", "Name: two, dtype: int64" ] } ], "prompt_number": 128 }, { "cell_type": "code", "collapsed": false, "input": [ "data[['three','one']]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
threeone
Ohio 2 0
Colorado 6 4
Utah 10 8
New York 14 12
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 129, "text": [ " three one\n", "Ohio 2 0\n", "Colorado 6 4\n", "Utah 10 8\n", "New York 14 12" ] } ], "prompt_number": 129 }, { "cell_type": "code", "collapsed": false, "input": [ "data[:2]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Ohio 0 1 2 3
Colorado 4 5 6 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 130, "text": [ " one two three four\n", "Ohio 0 1 2 3\n", "Colorado 4 5 6 7" ] } ], "prompt_number": 130 }, { "cell_type": "code", "collapsed": false, "input": [ "data[data['three']>5]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Colorado 4 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 131, "text": [ " one two three four\n", "Colorado 4 5 6 7\n", "Utah 8 9 10 11\n", "New York 12 13 14 15" ] } ], "prompt_number": 131 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "indexing with boolean DataFrame" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data<5" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Ohio True True True True
Colorado True False False False
Utah False False False False
New York False False False False
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 132, "text": [ " one two three four\n", "Ohio True True True True\n", "Colorado True False False False\n", "Utah False False False False\n", "New York False False False False" ] } ], "prompt_number": 132 }, { "cell_type": "code", "collapsed": false, "input": [ "data[data<5]=0" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 133 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothreefour
Ohio 0 0 0 0
Colorado 0 5 6 7
Utah 8 9 10 11
New York 12 13 14 15
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 134, "text": [ " one two three four\n", "Ohio 0 0 0 0\n", "Colorado 0 5 6 7\n", "Utah 8 9 10 11\n", "New York 12 13 14 15" ] } ], "prompt_number": 134 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "label-indexing on the rows (this is a less verbose way to do reindexing)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.ix['Colorado',['two','three']]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 135, "text": [ "two 5\n", "three 6\n", "Name: Colorado, dtype: int64" ] } ], "prompt_number": 135 }, { "cell_type": "code", "collapsed": false, "input": [ "data.ix[['Colorado','Utah'],[3,0,1]]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
fouronetwo
Colorado 7 0 5
Utah 11 8 9
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 136, "text": [ " four one two\n", "Colorado 7 0 5\n", "Utah 11 8 9" ] } ], "prompt_number": 136 }, { "cell_type": "code", "collapsed": false, "input": [ "data.ix[2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 137, "text": [ "one 8\n", "two 9\n", "three 10\n", "four 11\n", "Name: Utah, dtype: int64" ] } ], "prompt_number": 137 }, { "cell_type": "code", "collapsed": false, "input": [ "data.ix[:'Utah','two']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 138, "text": [ "Ohio 0\n", "Colorado 5\n", "Utah 9\n", "Name: two, dtype: int64" ] } ], "prompt_number": 138 }, { "cell_type": "code", "collapsed": false, "input": [ "data.ix[data.three>5,:3]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwothree
Colorado 0 5 6
Utah 8 9 10
New York 12 13 14
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 140, "text": [ " one two three\n", "Colorado 0 5 6\n", "Utah 8 9 10\n", "New York 12 13 14" ] } ], "prompt_number": 140 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Arithmetic and data alignment" ] }, { "cell_type": "code", "collapsed": false, "input": [ "s1 = Series([7.3,-2.5,3.4,1.5], index=['a','c','d','e'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 141 }, { "cell_type": "code", "collapsed": false, "input": [ "s2 = Series([-2.1,3.6,-1.5,4,3.1], index=['a','c','e','f','g'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 142 }, { "cell_type": "code", "collapsed": false, "input": [ "s1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 143, "text": [ "a 7.3\n", "c -2.5\n", "d 3.4\n", "e 1.5\n", "dtype: float64" ] } ], "prompt_number": 143 }, { "cell_type": "code", "collapsed": false, "input": [ "s2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 144, "text": [ "a -2.1\n", "c 3.6\n", "e -1.5\n", "f 4.0\n", "g 3.1\n", "dtype: float64" ] } ], "prompt_number": 144 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "adding these together yields" ] }, { "cell_type": "code", "collapsed": false, "input": [ "s1+s2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 145, "text": [ "a 5.2\n", "c 1.1\n", "d NaN\n", "e 0.0\n", "f NaN\n", "g NaN\n", "dtype: float64" ] } ], "prompt_number": 145 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "aligment is performed on both the rows and the columns" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df1 = DataFrame(np.arange(9.).reshape((3,3)),\n", " columns = list('bcd'),\n", " index=['Ohio','Texas','Colorado'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 146 }, { "cell_type": "code", "collapsed": false, "input": [ "df2 = DataFrame(np.arange(12.).reshape((4,3)),\n", " columns = list('bde'),\n", " index=['Utah','Ohio','Texas','Oregon'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 150 }, { "cell_type": "code", "collapsed": false, "input": [ "df1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bcd
Ohio 0 1 2
Texas 3 4 5
Colorado 6 7 8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 148, "text": [ " b c d\n", "Ohio 0 1 2\n", "Texas 3 4 5\n", "Colorado 6 7 8" ] } ], "prompt_number": 148 }, { "cell_type": "code", "collapsed": false, "input": [ "df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
Utah 0 1 2
Ohio 3 4 5
Texas 6 7 8
Oregon 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 151, "text": [ " b d e\n", "Utah 0 1 2\n", "Ohio 3 4 5\n", "Texas 6 7 8\n", "Oregon 9 10 11" ] } ], "prompt_number": 151 }, { "cell_type": "code", "collapsed": false, "input": [ "df1 + df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bcde
ColoradoNaNNaNNaNNaN
Ohio 3NaN 6NaN
OregonNaNNaNNaNNaN
Texas 9NaN 12NaN
UtahNaNNaNNaNNaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 152, "text": [ " b c d e\n", "Colorado NaN NaN NaN NaN\n", "Ohio 3 NaN 6 NaN\n", "Oregon NaN NaN NaN NaN\n", "Texas 9 NaN 12 NaN\n", "Utah NaN NaN NaN NaN" ] } ], "prompt_number": 152 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Arithmetci operations with fill values" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df1 = DataFrame(np.arange(12.).reshape((3,4)), columns = list('abcd'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 153 }, { "cell_type": "code", "collapsed": false, "input": [ "df2 = DataFrame(np.arange(20.).reshape((4,5)), columns = list('abcde'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 154 }, { "cell_type": "code", "collapsed": false, "input": [ "df1" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
0 0 1 2 3
1 4 5 6 7
2 8 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 155, "text": [ " a b c d\n", "0 0 1 2 3\n", "1 4 5 6 7\n", "2 8 9 10 11" ] } ], "prompt_number": 155 }, { "cell_type": "code", "collapsed": false, "input": [ "df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcde
0 0 1 2 3 4
1 5 6 7 8 9
2 10 11 12 13 14
3 15 16 17 18 19
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 156, "text": [ " a b c d e\n", "0 0 1 2 3 4\n", "1 5 6 7 8 9\n", "2 10 11 12 13 14\n", "3 15 16 17 18 19" ] } ], "prompt_number": 156 }, { "cell_type": "code", "collapsed": false, "input": [ "df1 + df2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcde
0 0 2 4 6NaN
1 9 11 13 15NaN
2 18 20 22 24NaN
3NaNNaNNaNNaNNaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 157, "text": [ " a b c d e\n", "0 0 2 4 6 NaN\n", "1 9 11 13 15 NaN\n", "2 18 20 22 24 NaN\n", "3 NaN NaN NaN NaN NaN" ] } ], "prompt_number": 157 }, { "cell_type": "code", "collapsed": false, "input": [ "df1.add(df2, fill_value=0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcde
0 0 2 4 6 4
1 9 11 13 15 9
2 18 20 22 24 14
3 15 16 17 18 19
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 158, "text": [ " a b c d e\n", "0 0 2 4 6 4\n", "1 9 11 13 15 9\n", "2 18 20 22 24 14\n", "3 15 16 17 18 19" ] } ], "prompt_number": 158 }, { "cell_type": "code", "collapsed": false, "input": [ "df1.reindex(columns=df2.columns, fill_value=0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcde
0 0 1 2 3 0
1 4 5 6 7 0
2 8 9 10 11 0
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 159, "text": [ " a b c d e\n", "0 0 1 2 3 0\n", "1 4 5 6 7 0\n", "2 8 9 10 11 0" ] } ], "prompt_number": 159 }, { "cell_type": "markdown", "metadata": {}, "source": [ "aritmetic methods: add, sub, div, mul" ] }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Operations between DataFrame and Series" ] }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "broadcasting" ] }, { "cell_type": "code", "collapsed": false, "input": [ "arr = np.arange(12.).reshape((3,4))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 160 }, { "cell_type": "code", "collapsed": false, "input": [ "arr" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 161, "text": [ "array([[ 0., 1., 2., 3.],\n", " [ 4., 5., 6., 7.],\n", " [ 8., 9., 10., 11.]])" ] } ], "prompt_number": 161 }, { "cell_type": "code", "collapsed": false, "input": [ "arr[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 162, "text": [ "array([ 0., 1., 2., 3.])" ] } ], "prompt_number": 162 }, { "cell_type": "code", "collapsed": false, "input": [ "arr-arr[0]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 163, "text": [ "array([[ 0., 0., 0., 0.],\n", " [ 4., 4., 4., 4.],\n", " [ 8., 8., 8., 8.]])" ] } ], "prompt_number": 163 }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame(np.arange(12.).reshape((4,3)), columns=list('bde'),\n", " index=['Utah','Ohio','Texas','Oregon'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 164 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
Utah 0 1 2
Ohio 3 4 5
Texas 6 7 8
Oregon 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 165, "text": [ " b d e\n", "Utah 0 1 2\n", "Ohio 3 4 5\n", "Texas 6 7 8\n", "Oregon 9 10 11" ] } ], "prompt_number": 165 }, { "cell_type": "code", "collapsed": false, "input": [ "series =frame.ix[0]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 166 }, { "cell_type": "code", "collapsed": false, "input": [ "series" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 167, "text": [ "b 0\n", "d 1\n", "e 2\n", "Name: Utah, dtype: float64" ] } ], "prompt_number": 167 }, { "cell_type": "code", "collapsed": false, "input": [ "frame-series" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
Utah 0 0 0
Ohio 3 3 3
Texas 6 6 6
Oregon 9 9 9
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 168, "text": [ " b d e\n", "Utah 0 0 0\n", "Ohio 3 3 3\n", "Texas 6 6 6\n", "Oregon 9 9 9" ] } ], "prompt_number": 168 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "if an index value is not found" ] }, { "cell_type": "code", "collapsed": false, "input": [ "series2 = Series(range(3), index=['b','e','f'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 169 }, { "cell_type": "code", "collapsed": false, "input": [ "series2" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 170, "text": [ "b 0\n", "e 1\n", "f 2\n", "dtype: int64" ] } ], "prompt_number": 170 }, { "cell_type": "code", "collapsed": false, "input": [ "frame + series2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bdef
Utah 0NaN 3NaN
Ohio 3NaN 6NaN
Texas 6NaN 9NaN
Oregon 9NaN 12NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 172, "text": [ " b d e f\n", "Utah 0 NaN 3 NaN\n", "Ohio 3 NaN 6 NaN\n", "Texas 6 NaN 9 NaN\n", "Oregon 9 NaN 12 NaN" ] } ], "prompt_number": 172 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "broadcast over the columns" ] }, { "cell_type": "code", "collapsed": false, "input": [ "series3 = frame['d']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 174 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
Utah 0 1 2
Ohio 3 4 5
Texas 6 7 8
Oregon 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 175, "text": [ " b d e\n", "Utah 0 1 2\n", "Ohio 3 4 5\n", "Texas 6 7 8\n", "Oregon 9 10 11" ] } ], "prompt_number": 175 }, { "cell_type": "code", "collapsed": false, "input": [ "series3" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 176, "text": [ "Utah 1\n", "Ohio 4\n", "Texas 7\n", "Oregon 10\n", "Name: d, dtype: float64" ] } ], "prompt_number": 176 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sub(series3, axis=0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
Utah-1 0 1
Ohio-1 0 1
Texas-1 0 1
Oregon-1 0 1
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 179, "text": [ " b d e\n", "Utah -1 0 1\n", "Ohio -1 0 1\n", "Texas -1 0 1\n", "Oregon -1 0 1" ] } ], "prompt_number": 179 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Function Application and Mapping" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame(np.random.randn(4,3), columns=list('bde'),\n", " index=['Utah','Ohio','Texas','Oregon'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
Utah-0.267451-1.829889 2.544224
Ohio 1.116375-0.270886 1.086111
Texas 0.390590-0.339125 0.341343
Oregon 1.145794-1.268138-0.925395
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ " b d e\n", "Utah -0.267451 -1.829889 2.544224\n", "Ohio 1.116375 -0.270886 1.086111\n", "Texas 0.390590 -0.339125 0.341343\n", "Oregon 1.145794 -1.268138 -0.925395" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "np.abs(frame)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
Utah 0.267451 1.829889 2.544224
Ohio 1.116375 0.270886 1.086111
Texas 0.390590 0.339125 0.341343
Oregon 1.145794 1.268138 0.925395
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 8, "text": [ " b d e\n", "Utah 0.267451 1.829889 2.544224\n", "Ohio 1.116375 0.270886 1.086111\n", "Texas 0.390590 0.339125 0.341343\n", "Oregon 1.145794 1.268138 0.925395" ] } ], "prompt_number": 8 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "apply method" ] }, { "cell_type": "code", "collapsed": false, "input": [ "f = lambda x: x.max()-x.min()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.apply(f,axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 13, "text": [ "Utah 4.374113\n", "Ohio 1.387261\n", "Texas 0.729715\n", "Oregon 2.413932\n", "dtype: float64" ] } ], "prompt_number": 13 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "function return a series with multiple values" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def f(x):\n", " return Series([x.min(),x.max()], index=['min','max'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.apply(f, axis=0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
min-0.267451-1.829889-0.925395
max 1.145794-0.270886 2.544224
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 17, "text": [ " b d e\n", "min -0.267451 -1.829889 -0.925395\n", "max 1.145794 -0.270886 2.544224" ] } ], "prompt_number": 17 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "element-wise python functions" ] }, { "cell_type": "code", "collapsed": false, "input": [ "format = lambda x: '%.2f' %x" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.applymap(format)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
bde
Utah -0.27 -1.83 2.54
Ohio 1.12 -0.27 1.09
Texas 0.39 -0.34 0.34
Oregon 1.15 -1.27 -0.93
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 19, "text": [ " b d e\n", "Utah -0.27 -1.83 2.54\n", "Ohio 1.12 -0.27 1.09\n", "Texas 0.39 -0.34 0.34\n", "Oregon 1.15 -1.27 -0.93" ] } ], "prompt_number": 19 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "map method" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame['e'].map(format)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ "Utah 2.54\n", "Ohio 1.09\n", "Texas 0.34\n", "Oregon -0.93\n", "Name: e, dtype: object" ] } ], "prompt_number": 20 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Sorting and ranking" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series(range(4), index=['d','a','b','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.sort_index()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 22, "text": [ "a 1\n", "b 2\n", "c 3\n", "d 0\n", "dtype: int64" ] } ], "prompt_number": 22 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "sort by index on either axis" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame(np.arange(8).reshape((2,4)), index = ['three','one'],\n", " columns = ['d','a','b','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sort_index()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dabc
one 4 5 6 7
three 0 1 2 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 24, "text": [ " d a b c\n", "one 4 5 6 7\n", "three 0 1 2 3" ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sort_index(axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
three 1 2 3 0
one 5 6 7 4
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ " a b c d\n", "three 1 2 3 0\n", "one 5 6 7 4" ] } ], "prompt_number": 25 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "descending order" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sort_index(axis=1, ascending = False)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dcba
three 0 3 2 1
one 4 7 6 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 26, "text": [ " d c b a\n", "three 0 3 2 1\n", "one 4 7 6 5" ] } ], "prompt_number": 26 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "sort a series by values" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series([4,7,-3,2])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.order()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 28, "text": [ "2 -3\n", "3 2\n", "0 4\n", "1 7\n", "dtype: int64" ] } ], "prompt_number": 28 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "missing values" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series([4,np.nan,7,np.nan,-3,2])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.order()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 30, "text": [ "4 -3\n", "5 2\n", "0 4\n", "2 7\n", "1 NaN\n", "3 NaN\n", "dtype: float64" ] } ], "prompt_number": 30 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "sort values in one or more columns in data frame" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame({'b':[4,7,-3,2],'a':[0,1,0,1]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
0 0 4
1 1 7
2 0-3
3 1 2
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 32, "text": [ " a b\n", "0 0 4\n", "1 1 7\n", "2 0 -3\n", "3 1 2" ] } ], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sort_index(by='b')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
2 0-3
3 1 2
0 0 4
1 1 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 33, "text": [ " a b\n", "2 0 -3\n", "3 1 2\n", "0 0 4\n", "1 1 7" ] } ], "prompt_number": 33 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sort_index(by = ['a','b'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
2 0-3
0 0 4
3 1 2
1 1 7
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 34, "text": [ " a b\n", "2 0 -3\n", "0 0 4\n", "3 1 2\n", "1 1 7" ] } ], "prompt_number": 34 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Ranking" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series([7, -5, 7, 4, 2, 0, 4])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.rank()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 36, "text": [ "0 6.5\n", "1 1.0\n", "2 6.5\n", "3 4.5\n", "4 3.0\n", "5 2.0\n", "6 4.5\n", "dtype: float64" ] } ], "prompt_number": 36 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "ranking according to the order they're observed in the data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj.rank(method='first')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 37, "text": [ "0 6\n", "1 1\n", "2 7\n", "3 4\n", "4 3\n", "5 2\n", "6 5\n", "dtype: float64" ] } ], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.rank(ascending=False, method='max')" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 38, "text": [ "0 2\n", "1 7\n", "2 2\n", "3 4\n", "4 5\n", "5 6\n", "6 4\n", "dtype: float64" ] } ], "prompt_number": 38 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "ranks over the rows or columns" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame({'b':[4.3,7,-3,2], 'a':[0,1,0,1],\n", " 'c':[-2,5,8,-2.5]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 39 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0 0 4.3-2.0
1 1 7.0 5.0
2 0-3.0 8.0
3 1 2.0-2.5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 40, "text": [ " a b c\n", "0 0 4.3 -2.0\n", "1 1 7.0 5.0\n", "2 0 -3.0 8.0\n", "3 1 2.0 -2.5" ] } ], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.rank(axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abc
0 2 3 1
1 1 3 2
2 2 1 3
3 2 3 1
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 41, "text": [ " a b c\n", "0 2 3 1\n", "1 1 3 2\n", "2 2 1 3\n", "3 2 3 1" ] } ], "prompt_number": 41 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "Tie-breaking methods with rank" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "average (default), min, max, first" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Axis indexes with duplicate values" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series(range(5),index=['a','a','b','b','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "obj" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 43, "text": [ "a 0\n", "a 1\n", "b 2\n", "b 3\n", "c 4\n", "dtype: int64" ] } ], "prompt_number": 43 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.index.is_unique" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 44, "text": [ "False" ] } ], "prompt_number": 44 }, { "cell_type": "code", "collapsed": false, "input": [ "obj['a']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 45, "text": [ "a 0\n", "a 1\n", "dtype: int64" ] } ], "prompt_number": 45 }, { "cell_type": "code", "collapsed": false, "input": [ "obj['c']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 46, "text": [ "4" ] } ], "prompt_number": 46 }, { "cell_type": "code", "collapsed": false, "input": [ "df = DataFrame(np.random.randn(4,3), index=['a','a','b','b'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 47 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
a 0.654546-0.963798 0.228551
a-1.451196-1.528684 1.567436
b-1.213714-1.556472-0.573577
b-1.838320-2.250823 0.440242
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 48, "text": [ " 0 1 2\n", "a 0.654546 -0.963798 0.228551\n", "a -1.451196 -1.528684 1.567436\n", "b -1.213714 -1.556472 -0.573577\n", "b -1.838320 -2.250823 0.440242" ] } ], "prompt_number": 48 }, { "cell_type": "code", "collapsed": false, "input": [ "df.ix['b']" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
b-1.213714-1.556472-0.573577
b-1.838320-2.250823 0.440242
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 49, "text": [ " 0 1 2\n", "b -1.213714 -1.556472 -0.573577\n", "b -1.838320 -2.250823 0.440242" ] } ], "prompt_number": 49 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Summarizing and Computing Descriptive Statistics" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = DataFrame([[1.4, np.nan], [7.1,-4.5],\n", " [np.nan, np.nan],[0.75,-1.3]],\n", " index=['a','b','c','d'],\n", " columns=['one','two'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 50 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwo
a 1.40 NaN
b 7.10-4.5
c NaN NaN
d 0.75-1.3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 51, "text": [ " one two\n", "a 1.40 NaN\n", "b 7.10 -4.5\n", "c NaN NaN\n", "d 0.75 -1.3" ] } ], "prompt_number": 51 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "column sums" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.sum()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 52, "text": [ "one 9.25\n", "two -5.80\n", "dtype: float64" ] } ], "prompt_number": 52 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "row sums" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.sum(axis=1) " ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 53, "text": [ "a 1.40\n", "b 2.60\n", "c NaN\n", "d -0.55\n", "dtype: float64" ] } ], "prompt_number": 53 }, { "cell_type": "markdown", "metadata": {}, "source": [ "NA values are excluded unless the entire slice is NA. This can be disables using skipna." ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.mean(axis=1, skipna=False)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 54, "text": [ "a NaN\n", "b 1.300\n", "c NaN\n", "d -0.275\n", "dtype: float64" ] } ], "prompt_number": 54 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "indirect statistics" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.idxmax()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 55, "text": [ "one b\n", "two d\n", "dtype: object" ] } ], "prompt_number": 55 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "accumulations" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.cumsum()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwo
a 1.40 NaN
b 8.50-4.5
c NaN NaN
d 9.25-5.8
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 57, "text": [ " one two\n", "a 1.40 NaN\n", "b 8.50 -4.5\n", "c NaN NaN\n", "d 9.25 -5.8" ] } ], "prompt_number": 57 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "multiple summary statistics" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.describe()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
onetwo
count 3.000000 2.000000
mean 3.083333-2.900000
std 3.493685 2.262742
min 0.750000-4.500000
25% 1.075000-3.700000
50% 1.400000-2.900000
75% 4.250000-2.100000
max 7.100000-1.300000
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 59, "text": [ " one two\n", "count 3.000000 2.000000\n", "mean 3.083333 -2.900000\n", "std 3.493685 2.262742\n", "min 0.750000 -4.500000\n", "25% 1.075000 -3.700000\n", "50% 1.400000 -2.900000\n", "75% 4.250000 -2.100000\n", "max 7.100000 -1.300000" ] } ], "prompt_number": 59 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "non-numeric data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series(['a','a','b','c']*4)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 60 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.describe()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 61, "text": [ "count 16\n", "unique 3\n", "top a\n", "freq 8\n", "dtype: object" ] } ], "prompt_number": 61 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Correlation and Covariance" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas.io.data as web" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 62 }, { "cell_type": "code", "collapsed": false, "input": [ "all_data = {}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 63 }, { "cell_type": "code", "collapsed": false, "input": [ "for ticker in ['AAPL','IBM','MSFT','GOOG']:\n", " all_data[ticker] = web.get_data_yahoo(ticker,'04/11/2010','04/14/2014')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 72 }, { "cell_type": "code", "collapsed": false, "input": [ "price = DataFrame({tic: data['Adj Close']\n", " for tic, data in all_data.iteritems()})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 73 }, { "cell_type": "code", "collapsed": false, "input": [ "volume = DataFrame({tic: data['Volume']\n", " for tic, data in all_data.iteritems()})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 74 }, { "cell_type": "code", "collapsed": false, "input": [ "returns = price.pct_change()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 75 }, { "cell_type": "code", "collapsed": false, "input": [ "returns.tail()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AAPLGOOGIBMMSFT
Date
2014-04-08-0.000136 0.031125-0.006328 0.000516
2014-04-09 0.013219 0.016652 0.017355 0.016508
2014-04-10-0.012912-0.041107-0.004904-0.027404
2014-04-11-0.007358-0.019133-0.002516-0.003913
2014-04-14 0.003981 0.003619 0.013244-0.000786
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 77, "text": [ " AAPL GOOG IBM MSFT\n", "Date \n", "2014-04-08 -0.000136 0.031125 -0.006328 0.000516\n", "2014-04-09 0.013219 0.016652 0.017355 0.016508\n", "2014-04-10 -0.012912 -0.041107 -0.004904 -0.027404\n", "2014-04-11 -0.007358 -0.019133 -0.002516 -0.003913\n", "2014-04-14 0.003981 0.003619 0.013244 -0.000786" ] } ], "prompt_number": 77 }, { "cell_type": "code", "collapsed": false, "input": [ "returns.MSFT.corr(returns.IBM)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 78, "text": [ "0.50623670894935635" ] } ], "prompt_number": 78 }, { "cell_type": "code", "collapsed": false, "input": [ "returns.MSFT.cov(returns.IBM)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 79, "text": [ "8.8008494273331047e-05" ] } ], "prompt_number": 79 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "full correlation or covariance" ] }, { "cell_type": "code", "collapsed": false, "input": [ "returns.corr()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AAPLGOOGIBMMSFT
AAPL 1.000000 0.737439 0.384410 0.336002
GOOG 0.737439 1.000000 0.329676 0.717087
IBM 0.384410 0.329676 1.000000 0.506237
MSFT 0.336002 0.717087 0.506237 1.000000
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 80, "text": [ " AAPL GOOG IBM MSFT\n", "AAPL 1.000000 0.737439 0.384410 0.336002\n", "GOOG 0.737439 1.000000 0.329676 0.717087\n", "IBM 0.384410 0.329676 1.000000 0.506237\n", "MSFT 0.336002 0.717087 0.506237 1.000000" ] } ], "prompt_number": 80 }, { "cell_type": "code", "collapsed": false, "input": [ "returns.cov()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AAPLGOOGIBMMSFT
AAPL 0.000301 0.000152 0.000080 0.000085
GOOG 0.000152 0.000528 0.000069 0.000265
IBM 0.000080 0.000069 0.000143 0.000088
MSFT 0.000085 0.000265 0.000088 0.000211
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 81, "text": [ " AAPL GOOG IBM MSFT\n", "AAPL 0.000301 0.000152 0.000080 0.000085\n", "GOOG 0.000152 0.000528 0.000069 0.000265\n", "IBM 0.000080 0.000069 0.000143 0.000088\n", "MSFT 0.000085 0.000265 0.000088 0.000211" ] } ], "prompt_number": 81 }, { "cell_type": "code", "collapsed": false, "input": [ "returns.corrwith(returns.IBM)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 82, "text": [ "AAPL 0.384410\n", "GOOG 0.329676\n", "IBM 1.000000\n", "MSFT 0.506237\n", "dtype: float64" ] } ], "prompt_number": 82 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "correlations of matching column names" ] }, { "cell_type": "code", "collapsed": false, "input": [ "returns.corrwith(volume)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 84, "text": [ "AAPL -0.128694\n", "GOOG -0.478992\n", "IBM -0.152372\n", "MSFT -0.102546\n", "dtype: float64" ] } ], "prompt_number": 84 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Unique values, value count, and membership" ] }, { "cell_type": "code", "collapsed": false, "input": [ "obj = Series(['c','a','d','a','a','b','b','c','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 85 }, { "cell_type": "code", "collapsed": false, "input": [ "uniques = obj.unique()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 86 }, { "cell_type": "code", "collapsed": false, "input": [ "uniques" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 87, "text": [ "array(['c', 'a', 'd', 'b'], dtype=object)" ] } ], "prompt_number": 87 }, { "cell_type": "code", "collapsed": false, "input": [ "obj.value_counts()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 89, "text": [ "c 3\n", "a 3\n", "b 2\n", "d 1\n", "dtype: int64" ] } ], "prompt_number": 89 }, { "cell_type": "code", "collapsed": false, "input": [ "pd.value_counts(obj.values, sort=False)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 91, "text": [ "a 3\n", "c 3\n", "b 2\n", "d 1\n", "dtype: int64" ] } ], "prompt_number": 91 }, { "cell_type": "code", "collapsed": false, "input": [ "mask = obj.isin(['b','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 93 }, { "cell_type": "code", "collapsed": false, "input": [ "mask" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 95, "text": [ "0 True\n", "1 False\n", "2 False\n", "3 False\n", "4 False\n", "5 True\n", "6 True\n", "7 True\n", "8 True\n", "dtype: bool" ] } ], "prompt_number": 95 }, { "cell_type": "code", "collapsed": false, "input": [ "obj[mask]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 96, "text": [ "0 c\n", "5 b\n", "6 b\n", "7 c\n", "8 c\n", "dtype: object" ] } ], "prompt_number": 96 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "compute a histogram" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = DataFrame({'Quo1':[1,3,4,3,4],\n", " 'Quo2': [2,3,1,2,3],\n", " 'Quo3':[1,5,2,4,4]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 97 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Quo1Quo2Quo3
0 1 2 1
1 3 3 5
2 4 1 2
3 3 2 4
4 4 3 4
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 98, "text": [ " Quo1 Quo2 Quo3\n", "0 1 2 1\n", "1 3 3 5\n", "2 4 1 2\n", "3 3 2 4\n", "4 4 3 4" ] } ], "prompt_number": 98 }, { "cell_type": "code", "collapsed": false, "input": [ "result = data.apply(pd.value_counts).fillna(0)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 100 }, { "cell_type": "code", "collapsed": false, "input": [ "result" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Quo1Quo2Quo3
1 1 1 1
2 0 2 1
3 2 2 0
4 2 0 2
5 0 0 1
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 101, "text": [ " Quo1 Quo2 Quo3\n", "1 1 1 1\n", "2 0 2 1\n", "3 2 2 0\n", "4 2 0 2\n", "5 0 0 1" ] } ], "prompt_number": 101 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Handling Missing Data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "string_data = Series(['aardvark','artichoke',np.nan, 'avocado'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 103 }, { "cell_type": "code", "collapsed": false, "input": [ "string_data" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 104, "text": [ "0 aardvark\n", "1 artichoke\n", "2 NaN\n", "3 avocado\n", "dtype: object" ] } ], "prompt_number": 104 }, { "cell_type": "code", "collapsed": false, "input": [ "string_data.isnull()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 105, "text": [ "0 False\n", "1 False\n", "2 True\n", "3 False\n", "dtype: bool" ] } ], "prompt_number": 105 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "built-in Python's none" ] }, { "cell_type": "code", "collapsed": false, "input": [ "string_data[0] = None" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 106 }, { "cell_type": "code", "collapsed": false, "input": [ "string_data.isnull()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 107, "text": [ "0 True\n", "1 False\n", "2 True\n", "3 False\n", "dtype: bool" ] } ], "prompt_number": 107 }, { "cell_type": "markdown", "metadata": {}, "source": [ "NA handling methods: dropna, fillna, isnull, notnull" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "FIltering out missing data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from numpy import nan as NA" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 109 }, { "cell_type": "code", "collapsed": false, "input": [ "data = Series([1, NA, 3.5, NA, 7])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 110 }, { "cell_type": "code", "collapsed": false, "input": [ "data.dropna()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 111, "text": [ "0 1.0\n", "2 3.5\n", "4 7.0\n", "dtype: float64" ] } ], "prompt_number": 111 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "using boolean indexing" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data[data.notnull()]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 112, "text": [ "0 1.0\n", "2 3.5\n", "4 7.0\n", "dtype: float64" ] } ], "prompt_number": 112 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "dropna with dataframa" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = DataFrame([[1.,6.5,3.],[1.,NA,NA],\n", " [NA,NA,NA],[NA,6.5,3.]])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 113 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0 1 6.5 3
1 1 NaNNaN
2NaN NaNNaN
3NaN 6.5 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 114, "text": [ " 0 1 2\n", "0 1 6.5 3\n", "1 1 NaN NaN\n", "2 NaN NaN NaN\n", "3 NaN 6.5 3" ] } ], "prompt_number": 114 }, { "cell_type": "code", "collapsed": false, "input": [ "cleaned = data.dropna()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 115 }, { "cell_type": "code", "collapsed": false, "input": [ "cleaned" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0 1 6.5 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 116, "text": [ " 0 1 2\n", "0 1 6.5 3" ] } ], "prompt_number": 116 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "drop rows that are all NA" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.dropna(how='all')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0 1 6.5 3
1 1 NaNNaN
3NaN 6.5 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 117, "text": [ " 0 1 2\n", "0 1 6.5 3\n", "1 1 NaN NaN\n", "3 NaN 6.5 3" ] } ], "prompt_number": 117 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "drop columns that are all NA" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data[4]=NA" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 118 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0124
0 1 6.5 3NaN
1 1 NaNNaNNaN
2NaN NaNNaNNaN
3NaN 6.5 3NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 119, "text": [ " 0 1 2 4\n", "0 1 6.5 3 NaN\n", "1 1 NaN NaN NaN\n", "2 NaN NaN NaN NaN\n", "3 NaN 6.5 3 NaN" ] } ], "prompt_number": 119 }, { "cell_type": "code", "collapsed": false, "input": [ "data.dropna(axis=1,how='all')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0 1 6.5 3
1 1 NaNNaN
2NaN NaNNaN
3NaN 6.5 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 120, "text": [ " 0 1 2\n", "0 1 6.5 3\n", "1 1 NaN NaN\n", "2 NaN NaN NaN\n", "3 NaN 6.5 3" ] } ], "prompt_number": 120 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "keep only rows contaiing a certain no of obs" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = DataFrame(np.random.randn(7,3))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 121 }, { "cell_type": "code", "collapsed": false, "input": [ "df.ix[:4,1]=NA; df.ix[:2,2]=NA" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 122 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0-0.676323 NaN NaN
1-1.088497 NaN NaN
2 0.122962 NaN NaN
3 2.087705 NaN 1.538338
4-1.267822 NaN-0.924860
5-1.596591-1.001367-0.705282
6-0.026753-0.694895-0.386125
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 123, "text": [ " 0 1 2\n", "0 -0.676323 NaN NaN\n", "1 -1.088497 NaN NaN\n", "2 0.122962 NaN NaN\n", "3 2.087705 NaN 1.538338\n", "4 -1.267822 NaN -0.924860\n", "5 -1.596591 -1.001367 -0.705282\n", "6 -0.026753 -0.694895 -0.386125" ] } ], "prompt_number": 123 }, { "cell_type": "code", "collapsed": false, "input": [ "df.dropna(thresh=2)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
3 2.087705 NaN 1.538338
4-1.267822 NaN-0.924860
5-1.596591-1.001367-0.705282
6-0.026753-0.694895-0.386125
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 124, "text": [ " 0 1 2\n", "3 2.087705 NaN 1.538338\n", "4 -1.267822 NaN -0.924860\n", "5 -1.596591 -1.001367 -0.705282\n", "6 -0.026753 -0.694895 -0.386125" ] } ], "prompt_number": 124 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Filling in Missing Data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.fillna(0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0-0.676323 0.000000 0.000000
1-1.088497 0.000000 0.000000
2 0.122962 0.000000 0.000000
3 2.087705 0.000000 1.538338
4-1.267822 0.000000-0.924860
5-1.596591-1.001367-0.705282
6-0.026753-0.694895-0.386125
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 125, "text": [ " 0 1 2\n", "0 -0.676323 0.000000 0.000000\n", "1 -1.088497 0.000000 0.000000\n", "2 0.122962 0.000000 0.000000\n", "3 2.087705 0.000000 1.538338\n", "4 -1.267822 0.000000 -0.924860\n", "5 -1.596591 -1.001367 -0.705282\n", "6 -0.026753 -0.694895 -0.386125" ] } ], "prompt_number": 125 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "different fill value for each column" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.fillna({1:0.5,3:-1})" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0-0.676323 0.500000 NaN
1-1.088497 0.500000 NaN
2 0.122962 0.500000 NaN
3 2.087705 0.500000 1.538338
4-1.267822 0.500000-0.924860
5-1.596591-1.001367-0.705282
6-0.026753-0.694895-0.386125
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 126, "text": [ " 0 1 2\n", "0 -0.676323 0.500000 NaN\n", "1 -1.088497 0.500000 NaN\n", "2 0.122962 0.500000 NaN\n", "3 2.087705 0.500000 1.538338\n", "4 -1.267822 0.500000 -0.924860\n", "5 -1.596591 -1.001367 -0.705282\n", "6 -0.026753 -0.694895 -0.386125" ] } ], "prompt_number": 126 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "modify existing project in place" ] }, { "cell_type": "code", "collapsed": false, "input": [ "_ = df.fillna(0, inplace=True)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 127 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0-0.676323 0.000000 0.000000
1-1.088497 0.000000 0.000000
2 0.122962 0.000000 0.000000
3 2.087705 0.000000 1.538338
4-1.267822 0.000000-0.924860
5-1.596591-1.001367-0.705282
6-0.026753-0.694895-0.386125
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 128, "text": [ " 0 1 2\n", "0 -0.676323 0.000000 0.000000\n", "1 -1.088497 0.000000 0.000000\n", "2 0.122962 0.000000 0.000000\n", "3 2.087705 0.000000 1.538338\n", "4 -1.267822 0.000000 -0.924860\n", "5 -1.596591 -1.001367 -0.705282\n", "6 -0.026753 -0.694895 -0.386125" ] } ], "prompt_number": 128 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "interpolation methods" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = DataFrame(np.random.randn(6,3))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 129 }, { "cell_type": "code", "collapsed": false, "input": [ "df.ix[2:,1]=NA; df.ix[4:,2]=NA" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 130 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0-0.570090-0.513487-0.707382
1 1.453435 0.440059-0.292707
2-0.045053 NaN 0.951857
3 1.277519 NaN-0.463472
4-2.012467 NaN NaN
5-0.854318 NaN NaN
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 131, "text": [ " 0 1 2\n", "0 -0.570090 -0.513487 -0.707382\n", "1 1.453435 0.440059 -0.292707\n", "2 -0.045053 NaN 0.951857\n", "3 1.277519 NaN -0.463472\n", "4 -2.012467 NaN NaN\n", "5 -0.854318 NaN NaN" ] } ], "prompt_number": 131 }, { "cell_type": "code", "collapsed": false, "input": [ "df.fillna(method = 'ffill')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0-0.570090-0.513487-0.707382
1 1.453435 0.440059-0.292707
2-0.045053 0.440059 0.951857
3 1.277519 0.440059-0.463472
4-2.012467 0.440059-0.463472
5-0.854318 0.440059-0.463472
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 132, "text": [ " 0 1 2\n", "0 -0.570090 -0.513487 -0.707382\n", "1 1.453435 0.440059 -0.292707\n", "2 -0.045053 0.440059 0.951857\n", "3 1.277519 0.440059 -0.463472\n", "4 -2.012467 0.440059 -0.463472\n", "5 -0.854318 0.440059 -0.463472" ] } ], "prompt_number": 132 }, { "cell_type": "code", "collapsed": false, "input": [ "df.fillna(method='ffill',limit=2)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
0-0.570090-0.513487-0.707382
1 1.453435 0.440059-0.292707
2-0.045053 0.440059 0.951857
3 1.277519 0.440059-0.463472
4-2.012467 NaN-0.463472
5-0.854318 NaN-0.463472
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 133, "text": [ " 0 1 2\n", "0 -0.570090 -0.513487 -0.707382\n", "1 1.453435 0.440059 -0.292707\n", "2 -0.045053 0.440059 0.951857\n", "3 1.277519 0.440059 -0.463472\n", "4 -2.012467 NaN -0.463472\n", "5 -0.854318 NaN -0.463472" ] } ], "prompt_number": 133 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "pass the mean or median value of a Series" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data=Series([1.,NA,3.5,NA,7])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 134 }, { "cell_type": "code", "collapsed": false, "input": [ "data.fillna(data.mean())" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 135, "text": [ "0 1.000000\n", "1 3.833333\n", "2 3.500000\n", "3 3.833333\n", "4 7.000000\n", "dtype: float64" ] } ], "prompt_number": 135 }, { "cell_type": "markdown", "metadata": {}, "source": [ "fillna function arguments : value, method, axis, inplace, limit" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Hierarchical Indexing" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = Series(np.random.randn(10),\n", " index = [['a','a','a','b','b','b','c','c','d','d'],\n", " [1,2,3,1,2,3,1,2,2,3]])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 136 }, { "cell_type": "code", "collapsed": false, "input": [ "data" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 137, "text": [ "a 1 -0.734465\n", " 2 -0.775564\n", " 3 -2.134157\n", "b 1 1.169687\n", " 2 0.655510\n", " 3 0.229640\n", "c 1 0.607370\n", " 2 -0.227422\n", "d 2 -0.319022\n", " 3 -1.861461\n", "dtype: float64" ] } ], "prompt_number": 137 }, { "cell_type": "code", "collapsed": false, "input": [ "data.index" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 138, "text": [ "MultiIndex(levels=[[u'a', u'b', u'c', u'd'], [1, 2, 3]],\n", " labels=[[0, 0, 0, 1, 1, 1, 2, 2, 3, 3], [0, 1, 2, 0, 1, 2, 0, 1, 1, 2]])" ] } ], "prompt_number": 138 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "partial indexing" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data['b']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 139, "text": [ "1 1.169687\n", "2 0.655510\n", "3 0.229640\n", "dtype: float64" ] } ], "prompt_number": 139 }, { "cell_type": "code", "collapsed": false, "input": [ "data['b':'c']" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 140, "text": [ "b 1 1.169687\n", " 2 0.655510\n", " 3 0.229640\n", "c 1 0.607370\n", " 2 -0.227422\n", "dtype: float64" ] } ], "prompt_number": 140 }, { "cell_type": "code", "collapsed": false, "input": [ "data.ix[['b','d']]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 141, "text": [ "b 1 1.169687\n", " 2 0.655510\n", " 3 0.229640\n", "d 2 -0.319022\n", " 3 -1.861461\n", "dtype: float64" ] } ], "prompt_number": 141 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "selection from an inner level" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data[:,2]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 142, "text": [ "a -0.775564\n", "b 0.655510\n", "c -0.227422\n", "d -0.319022\n", "dtype: float64" ] } ], "prompt_number": 142 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "rearranged into a DataFrame" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.unstack()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
123
a-0.734465-0.775564-2.134157
b 1.169687 0.655510 0.229640
c 0.607370-0.227422 NaN
d NaN-0.319022-1.861461
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 143, "text": [ " 1 2 3\n", "a -0.734465 -0.775564 -2.134157\n", "b 1.169687 0.655510 0.229640\n", "c 0.607370 -0.227422 NaN\n", "d NaN -0.319022 -1.861461" ] } ], "prompt_number": 143 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "inverse operation of unstack" ] }, { "cell_type": "code", "collapsed": false, "input": [ "data.unstack().stack()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 144, "text": [ "a 1 -0.734465\n", " 2 -0.775564\n", " 3 -2.134157\n", "b 1 1.169687\n", " 2 0.655510\n", " 3 0.229640\n", "c 1 0.607370\n", " 2 -0.227422\n", "d 2 -0.319022\n", " 3 -1.861461\n", "dtype: float64" ] } ], "prompt_number": 144 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "either axis can have a hierarchical index:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame(np.arange(12).reshape((4,3)),\n", " index = [['a','a','b','b'],[1,2,1,2]],\n", " columns = [['Ohio','Ohio','Colorado'],\n", " ['Green','Red','Green']])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 145 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OhioColorado
GreenRedGreen
a1 0 1 2
2 3 4 5
b1 6 7 8
2 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 146, "text": [ " Ohio Colorado\n", " Green Red Green\n", "a 1 0 1 2\n", " 2 3 4 5\n", "b 1 6 7 8\n", " 2 9 10 11" ] } ], "prompt_number": 146 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "hierarchical levels can have names" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame.index.names = ['key1', 'key2']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 147 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.columns.names = ['state', 'color']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 149 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
colorGreenRedGreen
key1key2
a1 0 1 2
2 3 4 5
b1 6 7 8
2 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 150, "text": [ "state Ohio Colorado\n", "color Green Red Green\n", "key1 key2 \n", "a 1 0 1 2\n", " 2 3 4 5\n", "b 1 6 7 8\n", " 2 9 10 11" ] } ], "prompt_number": 150 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "partial column indexing" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame['Ohio']" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
colorGreenRed
key1key2
a1 0 1
2 3 4
b1 6 7
2 9 10
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 151, "text": [ "color Green Red\n", "key1 key2 \n", "a 1 0 1\n", " 2 3 4\n", "b 1 6 7\n", " 2 9 10" ] } ], "prompt_number": 151 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Reordering and Sorting Levels" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame.swaplevel('key1','key2')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
colorGreenRedGreen
key2key1
1a 0 1 2
2a 3 4 5
1b 6 7 8
2b 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 152, "text": [ "state Ohio Colorado\n", "color Green Red Green\n", "key2 key1 \n", "1 a 0 1 2\n", "2 a 3 4 5\n", "1 b 6 7 8\n", "2 b 9 10 11" ] } ], "prompt_number": 152 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sortlevel(1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
colorGreenRedGreen
key1key2
a1 0 1 2
b1 6 7 8
a2 3 4 5
b2 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 153, "text": [ "state Ohio Colorado\n", "color Green Red Green\n", "key1 key2 \n", "a 1 0 1 2\n", "b 1 6 7 8\n", "a 2 3 4 5\n", "b 2 9 10 11" ] } ], "prompt_number": 153 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sortlevel(0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
colorGreenRedGreen
key1key2
a1 0 1 2
2 3 4 5
b1 6 7 8
2 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 154, "text": [ "state Ohio Colorado\n", "color Green Red Green\n", "key1 key2 \n", "a 1 0 1 2\n", " 2 3 4 5\n", "b 1 6 7 8\n", " 2 9 10 11" ] } ], "prompt_number": 154 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.swaplevel(0,1).sortlevel(0)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
colorGreenRedGreen
key2key1
1a 0 1 2
b 6 7 8
2a 3 4 5
b 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 155, "text": [ "state Ohio Colorado\n", "color Green Red Green\n", "key2 key1 \n", "1 a 0 1 2\n", " b 6 7 8\n", "2 a 3 4 5\n", " b 9 10 11" ] } ], "prompt_number": 155 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Summary Statistics by level" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
colorGreenRedGreen
key1key2
a1 0 1 2
2 3 4 5
b1 6 7 8
2 9 10 11
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 156, "text": [ "state Ohio Colorado\n", "color Green Red Green\n", "key1 key2 \n", "a 1 0 1 2\n", " 2 3 4 5\n", "b 1 6 7 8\n", " 2 9 10 11" ] } ], "prompt_number": 156 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sum(level='key2')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
stateOhioColorado
colorGreenRedGreen
key2
1 6 8 10
2 12 14 16
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 157, "text": [ "state Ohio Colorado\n", "color Green Red Green\n", "key2 \n", "1 6 8 10\n", "2 12 14 16" ] } ], "prompt_number": 157 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.sum(level='color',axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
colorGreenRed
key1key2
a1 2 1
2 8 4
b1 14 7
2 20 10
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 158, "text": [ "color Green Red\n", "key1 key2 \n", "a 1 2 1\n", " 2 8 4\n", "b 1 14 7\n", " 2 20 10" ] } ], "prompt_number": 158 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Using DataFrame's COlumns" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame({'a':range(7), 'b':range(7,0,-1),\n", " 'c':['one','one','one','two','two','two','two'],\n", " 'd':[0,1,2,0,1,2,3]})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 161 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
0 0 7 one 0
1 1 6 one 1
2 2 5 one 2
3 3 4 two 0
4 4 3 two 1
5 5 2 two 2
6 6 1 two 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 162, "text": [ " a b c d\n", "0 0 7 one 0\n", "1 1 6 one 1\n", "2 2 5 one 2\n", "3 3 4 two 0\n", "4 4 3 two 1\n", "5 5 2 two 2\n", "6 6 1 two 3" ] } ], "prompt_number": 162 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "new data frame" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame2 = frame.set_index(['c','d'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 163 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ab
cd
one0 0 7
1 1 6
2 2 5
two0 3 4
1 4 3
2 5 2
3 6 1
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 164, "text": [ " a b\n", "c d \n", "one 0 0 7\n", " 1 1 6\n", " 2 2 5\n", "two 0 3 4\n", " 1 4 3\n", " 2 5 2\n", " 3 6 1" ] } ], "prompt_number": 164 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "you can leave the columns in" ] }, { "cell_type": "code", "collapsed": false, "input": [ "frame.set_index(['c','d'], drop=False)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abcd
cd
one0 0 7 one 0
1 1 6 one 1
2 2 5 one 2
two0 3 4 two 0
1 4 3 two 1
2 5 2 two 2
3 6 1 two 3
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 165, "text": [ " a b c d\n", "c d \n", "one 0 0 7 one 0\n", " 1 1 6 one 1\n", " 2 2 5 one 2\n", "two 0 3 4 two 0\n", " 1 4 3 two 1\n", " 2 5 2 two 2\n", " 3 6 1 two 3" ] } ], "prompt_number": 165 }, { "cell_type": "code", "collapsed": false, "input": [ "frame2.reset_index()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cdab
0 one 0 0 7
1 one 1 1 6
2 one 2 2 5
3 two 0 3 4
4 two 1 4 3
5 two 2 5 2
6 two 3 6 1
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 166, "text": [ " c d a b\n", "0 one 0 0 7\n", "1 one 1 1 6\n", "2 one 2 2 5\n", "3 two 0 3 4\n", "4 two 1 4 3\n", "5 two 2 5 2\n", "6 two 3 6 1" ] } ], "prompt_number": 166 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Other Pandas Topics" ] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Integer indexing " ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser = Series(np.arange(3.))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 167 }, { "cell_type": "code", "collapsed": false, "input": [ "ser[-1]" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "-1", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mser\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Users/sergulaydore/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 482\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 483\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 484\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 485\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 486\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0misscalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/sergulaydore/anaconda/lib/python2.7/site-packages/pandas/core/index.pyc\u001b[0m in \u001b[0;36mget_value\u001b[0;34m(self, series, key)\u001b[0m\n\u001b[1;32m 1194\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1195\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1196\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1197\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1198\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minferred_type\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m'integer'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'boolean'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/sergulaydore/anaconda/lib/python2.7/site-packages/pandas/index.so\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_value (pandas/index.c:2993)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m/Users/sergulaydore/anaconda/lib/python2.7/site-packages/pandas/index.so\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_value (pandas/index.c:2808)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m/Users/sergulaydore/anaconda/lib/python2.7/site-packages/pandas/index.so\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_loc (pandas/index.c:3534)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m/Users/sergulaydore/anaconda/lib/python2.7/site-packages/pandas/hashtable.so\u001b[0m in \u001b[0;36mpandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:7035)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m/Users/sergulaydore/anaconda/lib/python2.7/site-packages/pandas/hashtable.so\u001b[0m in \u001b[0;36mpandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6976)\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: -1" ] } ], "prompt_number": 168 }, { "cell_type": "code", "collapsed": false, "input": [ "ser" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 169, "text": [ "0 0\n", "1 1\n", "2 2\n", "dtype: float64" ] } ], "prompt_number": 169 }, { "cell_type": "markdown", "metadata": {}, "source": [ "non-integer index works" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser2 = Series(np.arange(3.), index = ['a','b','c'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 170 }, { "cell_type": "code", "collapsed": false, "input": [ "ser2[-1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 171, "text": [ "2.0" ] } ], "prompt_number": 171 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "To keep things consistent, use .ix" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser.ix[:1]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 172, "text": [ "0 0\n", "1 1\n", "dtype: float64" ] } ], "prompt_number": 172 }, { "cell_type": "heading", "level": 5, "metadata": {}, "source": [ "reliable position indexing" ] }, { "cell_type": "code", "collapsed": false, "input": [ "ser3 = Series(range(3), index = [-5, 1,3])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 173 }, { "cell_type": "code", "collapsed": false, "input": [ "ser3.iget_value(2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 174, "text": [ "2" ] } ], "prompt_number": 174 }, { "cell_type": "code", "collapsed": false, "input": [ "frame = DataFrame(np.arange(6).reshape(3,2), index=[2,0,1])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 175 }, { "cell_type": "code", "collapsed": false, "input": [ "frame" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
2 0 1
0 2 3
1 4 5
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 176, "text": [ " 0 1\n", "2 0 1\n", "0 2 3\n", "1 4 5" ] } ], "prompt_number": 176 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.irow(0)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 177, "text": [ "0 0\n", "1 1\n", "Name: 2, dtype: int64" ] } ], "prompt_number": 177 }, { "cell_type": "code", "collapsed": false, "input": [ "frame.irow(2)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 179, "text": [ "0 4\n", "1 5\n", "Name: 1, dtype: int64" ] } ], "prompt_number": 179 } ], "metadata": {} } ] }