{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Examining the Data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "pd.options.display.max_rows = 6\n", "pd.options.display.max_columns = 8\n", "pd.options.display.width = 100" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_idbeer_name...profile_namereview_tastetexttime
07.02511287Bell's Cherry Stout...blaheath4.5Batch 8144\\tPitch black in color with a 1/2 f...2009-10-05 21:31:48
15.7197369790Duck-Rabbit Porter...GJ404.0Sampled from a 12oz bottle in a standard pint...2009-10-05 21:32:09
24.8110983182Fürstenberg Premium Pilsener...biegaman3.5Haystack yellow with an energetic group of bu...2009-10-05 21:32:13
..............................
499978.1219502372Terrapin Coffee Oatmeal Imperial Stout...ugaterrapin4.5Poured a light sucking crude oil beckoning bl...2009-12-25 17:23:52
499984.654531306Badger Original Ale...MrHurmateeowish3.5500ml brown bottle, 4.0% ABV. Pours a crystal...2009-12-25 17:25:06
499999.44769514879Barrel Aged B.O.R.I.S. Oatmeal Imperial Stout...strictly4DK4.522 oz bottle poured into a flute glass, share...2009-12-25 17:26:06
\n", "

50000 rows × 13 columns

\n", "
" ], "text/plain": [ " abv beer_id brewer_id beer_name ... \\\n", "0 7.0 2511 287 Bell's Cherry Stout ... \n", "1 5.7 19736 9790 Duck-Rabbit Porter ... \n", "2 4.8 11098 3182 Fürstenberg Premium Pilsener ... \n", "... ... ... ... ... ... \n", "49997 8.1 21950 2372 Terrapin Coffee Oatmeal Imperial Stout ... \n", "49998 4.6 5453 1306 Badger Original Ale ... \n", "49999 9.4 47695 14879 Barrel Aged B.O.R.I.S. Oatmeal Imperial Stout ... \n", "\n", " profile_name review_taste text \\\n", "0 blaheath 4.5 Batch 8144\\tPitch black in color with a 1/2 f... \n", "1 GJ40 4.0 Sampled from a 12oz bottle in a standard pint... \n", "2 biegaman 3.5 Haystack yellow with an energetic group of bu... \n", "... ... ... ... \n", "49997 ugaterrapin 4.5 Poured a light sucking crude oil beckoning bl... \n", "49998 MrHurmateeowish 3.5 500ml brown bottle, 4.0% ABV. Pours a crystal... \n", "49999 strictly4DK 4.5 22 oz bottle poured into a flute glass, share... \n", "\n", " time \n", "0 2009-10-05 21:31:48 \n", "1 2009-10-05 21:32:09 \n", "2 2009-10-05 21:32:13 \n", "... ... \n", "49997 2009-12-25 17:23:52 \n", "49998 2009-12-25 17:25:06 \n", "49999 2009-12-25 17:26:06 \n", "\n", "[50000 rows x 13 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_hdf('data/beer.hdf','df')\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 50000 entries, 0 to 49999\n", "Data columns (total 13 columns):\n", "abv 48389 non-null float64\n", "beer_id 50000 non-null int64\n", "brewer_id 50000 non-null int64\n", "beer_name 50000 non-null object\n", "beer_style 50000 non-null object\n", "review_appearance 50000 non-null float64\n", "review_aroma 50000 non-null float64\n", "review_overall 50000 non-null float64\n", "review_palate 50000 non-null float64\n", "profile_name 50000 non-null object\n", "review_taste 50000 non-null float64\n", "text 50000 non-null object\n", "time 50000 non-null datetime64[ns]\n", "dtypes: datetime64[ns](1), float64(6), int64(2), object(4)\n", "memory usage: 5.3+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Text Data & .str accessor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "http://pandas.pydata.org/pandas-docs/stable/text.html" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "0 American Stout\n", "1 American Porter\n", "2 German Pilsener\n", " ... \n", "49997 American Double / Imperial Stout\n", "49998 English Pale Ale\n", "49999 Russian Imperial Stout\n", "Name: beer_style, dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.beer_style" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/plain": [ "0 14\n", "1 15\n", "2 15\n", " ..\n", "49997 32\n", "49998 16\n", "49999 22\n", "Name: beer_style, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.beer_style.str.len()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/plain": [ "0 True\n", "1 True\n", "2 False\n", " ... \n", "49997 True\n", "49998 False\n", "49999 False\n", "Name: beer_style, dtype: bool" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.beer_style.str.contains('[A|a]merican')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Datetime Data & .dt accessor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "http://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-date-components" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "0 2009-10-05 21:31:48\n", "1 2009-10-05 21:32:09\n", "2 2009-10-05 21:32:13\n", " ... \n", "49997 2009-12-25 17:23:52\n", "49998 2009-12-25 17:25:06\n", "49999 2009-12-25 17:26:06\n", "Name: time, dtype: datetime64[ns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.time" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/plain": [ "0 2009-10-05\n", "1 2009-10-05\n", "2 2009-10-05\n", " ... \n", "49997 2009-12-25\n", "49998 2009-12-25\n", "49999 2009-12-25\n", "Name: time, dtype: object" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.time.dt.date" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "0 21\n", "1 21\n", "2 21\n", " ..\n", "49997 17\n", "49998 17\n", "49999 17\n", "Name: time, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.time.dt.hour" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Categoricals & .cat accessor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "http://pandas.pydata.org/pandas-docs/stable/categorical.html" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
beer_namebeer_styleprofile_nametext
count50000500005000050000
unique8762104412449978
topSierra Nevada Celebration AleAmerican IPAdrabmuhnan
freq24043242429
\n", "
" ], "text/plain": [ " beer_name beer_style profile_name text\n", "count 50000 50000 50000 50000\n", "unique 8762 104 4124 49978\n", "top Sierra Nevada Celebration Ale American IPA drabmuh nan\n", "freq 240 4324 242 9" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.select_dtypes(include=['object']).describe()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 50000 entries, 0 to 49999\n", "Data columns (total 1 columns):\n", "beer_style 50000 non-null object\n", "dtypes: object(1)\n", "memory usage: 781.2+ KB\n" ] } ], "source": [ "df[['beer_style']].info()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [], "source": [ "df['beer_style'] = df['beer_style'].astype('category')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 50000 entries, 0 to 49999\n", "Data columns (total 1 columns):\n", "beer_style 50000 non-null category\n", "dtypes: category(1)\n", "memory usage: 440.3 KB\n" ] } ], "source": [ "df[['beer_style']].info()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/plain": [ "0 18\n", "1 17\n", "2 61\n", " ..\n", "49997 11\n", "49998 47\n", "49999 89\n", "dtype: int8" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.beer_style.cat.codes" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "Index(['Altbier', 'American Adjunct Lager', 'American Amber / Red Ale',\n", " 'American Amber / Red Lager', 'American Barleywine', 'American Black Ale',\n", " 'American Blonde Ale', 'American Brown Ale', 'American Dark Wheat Ale',\n", " 'American Double / Imperial IPA',\n", " ...\n", " 'Scotch Ale / Wee Heavy', 'Scottish Ale', 'Scottish Gruit / Ancient Herbed Ale',\n", " 'Smoked Beer', 'Tripel', 'Vienna Lager', 'Weizenbock', 'Wheatwine', 'Winter Warmer',\n", " 'Witbier'],\n", " dtype='object', length=104)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.beer_style.cat.categories" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.beer_style.cat.ordered" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Indexing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "http://pandas.pydata.org/pandas-docs/stable/indexing.html" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Boolean indexing\n", "\n", "Like a where clause in SQL. The indexer (or boolean mask) should be 1-dimensional and the same length as the thing being indexed." ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 False\n", "2 True\n", " ... \n", "49997 False\n", "49998 True\n", "49999 False\n", "Name: abv, dtype: bool" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.abv < 5" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_idbeer_name...profile_namereview_tastetexttime
24.8110983182Fürstenberg Premium Pilsener...biegaman3.5Haystack yellow with an energetic group of bu...2009-10-05 21:32:13
74.81669256Great White...n0rc414.5Ok, for starters great white I believe will b...2009-10-05 21:34:29
214.6401118Dark Island...abuliarose4.0Poured into a snifter, revealing black opaque...2009-10-05 21:47:36
..............................
499764.7185529663Ichnusa...gregalobeer2.0When in Sardinia I drink this beer, and altho...2009-12-25 15:43:48
499804.15146429Bud Light Golden Wheat...wolfpack873.5Not quite as good as other wheat beers, but i...2009-12-25 15:57:12
499984.654531306Badger Original Ale...MrHurmateeowish3.5500ml brown bottle, 4.0% ABV. Pours a crystal...2009-12-25 17:25:06
\n", "

5399 rows × 13 columns

\n", "
" ], "text/plain": [ " abv beer_id brewer_id beer_name ... profile_name \\\n", "2 4.8 11098 3182 Fürstenberg Premium Pilsener ... biegaman \n", "7 4.8 1669 256 Great White ... n0rc41 \n", "21 4.6 401 118 Dark Island ... abuliarose \n", "... ... ... ... ... ... ... \n", "49976 4.7 18552 9663 Ichnusa ... gregalobeer \n", "49980 4.1 51464 29 Bud Light Golden Wheat ... wolfpack87 \n", "49998 4.6 5453 1306 Badger Original Ale ... MrHurmateeowish \n", "\n", " review_taste text time \n", "2 3.5 Haystack yellow with an energetic group of bu... 2009-10-05 21:32:13 \n", "7 4.5 Ok, for starters great white I believe will b... 2009-10-05 21:34:29 \n", "21 4.0 Poured into a snifter, revealing black opaque... 2009-10-05 21:47:36 \n", "... ... ... ... \n", "49976 2.0 When in Sardinia I drink this beer, and altho... 2009-12-25 15:43:48 \n", "49980 3.5 Not quite as good as other wheat beers, but i... 2009-12-25 15:57:12 \n", "49998 3.5 500ml brown bottle, 4.0% ABV. Pours a crystal... 2009-12-25 17:25:06 \n", "\n", "[5399 rows x 13 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.abv < 5]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "Notice that we just used `[]` there. We can pass the boolean indexer in to `.loc` as well." ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "scrolled": true, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
beer_stylereview_overall
2German Pilsener3.0
7Witbier4.5
21Scottish Ale3.5
.........
49976Euro Pale Lager3.0
49980Herbed / Spiced Beer4.0
49998English Pale Ale4.0
\n", "

5399 rows × 2 columns

\n", "
" ], "text/plain": [ " beer_style review_overall\n", "2 German Pilsener 3.0\n", "7 Witbier 4.5\n", "21 Scottish Ale 3.5\n", "... ... ...\n", "49976 Euro Pale Lager 3.0\n", "49980 Herbed / Spiced Beer 4.0\n", "49998 English Pale Ale 4.0\n", "\n", "[5399 rows x 2 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[df.abv < 5, ['beer_style', 'review_overall']]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "Again, you can get complicated" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_idbeer_name...profile_namereview_tastetexttime
07.02511287Bell's Cherry Stout...blaheath4.5Batch 8144\\tPitch black in color with a 1/2 f...2009-10-05 21:31:48
15.7197369790Duck-Rabbit Porter...GJ404.0Sampled from a 12oz bottle in a standard pint...2009-10-05 21:32:09
24.8110983182Fürstenberg Premium Pilsener...biegaman3.5Haystack yellow with an energetic group of bu...2009-10-05 21:32:13
..............................
499978.1219502372Terrapin Coffee Oatmeal Imperial Stout...ugaterrapin4.5Poured a light sucking crude oil beckoning bl...2009-12-25 17:23:52
499984.654531306Badger Original Ale...MrHurmateeowish3.5500ml brown bottle, 4.0% ABV. Pours a crystal...2009-12-25 17:25:06
499999.44769514879Barrel Aged B.O.R.I.S. Oatmeal Imperial Stout...strictly4DK4.522 oz bottle poured into a flute glass, share...2009-12-25 17:26:06
\n", "

18007 rows × 13 columns

\n", "
" ], "text/plain": [ " abv beer_id brewer_id beer_name ... \\\n", "0 7.0 2511 287 Bell's Cherry Stout ... \n", "1 5.7 19736 9790 Duck-Rabbit Porter ... \n", "2 4.8 11098 3182 Fürstenberg Premium Pilsener ... \n", "... ... ... ... ... ... \n", "49997 8.1 21950 2372 Terrapin Coffee Oatmeal Imperial Stout ... \n", "49998 4.6 5453 1306 Badger Original Ale ... \n", "49999 9.4 47695 14879 Barrel Aged B.O.R.I.S. Oatmeal Imperial Stout ... \n", "\n", " profile_name review_taste text \\\n", "0 blaheath 4.5 Batch 8144\\tPitch black in color with a 1/2 f... \n", "1 GJ40 4.0 Sampled from a 12oz bottle in a standard pint... \n", "2 biegaman 3.5 Haystack yellow with an energetic group of bu... \n", "... ... ... ... \n", "49997 ugaterrapin 4.5 Poured a light sucking crude oil beckoning bl... \n", "49998 MrHurmateeowish 3.5 500ml brown bottle, 4.0% ABV. Pours a crystal... \n", "49999 strictly4DK 4.5 22 oz bottle poured into a flute glass, share... \n", "\n", " time \n", "0 2009-10-05 21:31:48 \n", "1 2009-10-05 21:32:09 \n", "2 2009-10-05 21:32:13 \n", "... ... \n", "49997 2009-12-25 17:23:52 \n", "49998 2009-12-25 17:25:06 \n", "49999 2009-12-25 17:26:06 \n", "\n", "[18007 rows x 13 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[((df.abv < 5) & (df.time > pd.Timestamp('2009-06'))) | (\n", " df.review_overall >= 4.5)]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# isin\n", "\n", "Select just the rows where the `beer_style` contains IPA. " ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "ename": "AttributeError", "evalue": "'CategoricalAccessor' object has no attribute 'contains'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbeer_style\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcat\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontains\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'IPA'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mAttributeError\u001b[0m: 'CategoricalAccessor' object has no attribute 'contains'" ] } ], "source": [ "df[df.beer_style.cat.contains('IPA')]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_idbeer_name...profile_namereview_tastetexttime
39.5285773818Unearthly (Imperial India Pale Ale)...nick764.0The aroma has pine, wood, citrus, caramel, an...2009-10-05 21:32:37
86.76549140Northern Hemisphere Harvest Wet Hop Ale...david184.0I like all of Sierra Nevada's beers but felt ...2009-10-05 21:34:31
168.0361793818Hoppe (Imperial Extra Pale Ale)...nick763.0The aroma is papery with citrus, yeast, and s...2009-10-05 21:43:23
..............................
499477.0709199Big Eye IPA...billshmeinke4.012oz into my pint glass.\\t\\tA: Golden honey a...2009-12-25 08:48:11
499846.0383883718L'IPA Du Lièvre...hlance4.5I love this beer, seek it out when I am in Mo...2009-12-25 16:25:45
499968.07971863Pliny The Elder...hrking4.0This is a big hoppy monster of an IPA..If you...2009-12-25 17:23:24
\n", "

7774 rows × 13 columns

\n", "
" ], "text/plain": [ " abv beer_id brewer_id beer_name ... \\\n", "3 9.5 28577 3818 Unearthly (Imperial India Pale Ale) ... \n", "8 6.7 6549 140 Northern Hemisphere Harvest Wet Hop Ale ... \n", "16 8.0 36179 3818 Hoppe (Imperial Extra Pale Ale) ... \n", "... ... ... ... ... ... \n", "49947 7.0 709 199 Big Eye IPA ... \n", "49984 6.0 38388 3718 L'IPA Du Lièvre ... \n", "49996 8.0 7971 863 Pliny The Elder ... \n", "\n", " profile_name review_taste text \\\n", "3 nick76 4.0 The aroma has pine, wood, citrus, caramel, an... \n", "8 david18 4.0 I like all of Sierra Nevada's beers but felt ... \n", "16 nick76 3.0 The aroma is papery with citrus, yeast, and s... \n", "... ... ... ... \n", "49947 billshmeinke 4.0 12oz into my pint glass.\\t\\tA: Golden honey a... \n", "49984 hlance 4.5 I love this beer, seek it out when I am in Mo... \n", "49996 hrking 4.0 This is a big hoppy monster of an IPA..If you... \n", "\n", " time \n", "3 2009-10-05 21:32:37 \n", "8 2009-10-05 21:34:31 \n", "16 2009-10-05 21:43:23 \n", "... ... \n", "49947 2009-12-25 08:48:11 \n", "49984 2009-12-25 16:25:45 \n", "49996 2009-12-25 17:23:24 \n", "\n", "[7774 rows x 13 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df.beer_style.astype(object)).str.contains('IPA')]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "### Is there another / better way?" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "Index(['American Double / Imperial IPA', 'American IPA', 'Belgian IPA',\n", " 'English India Pale Ale (IPA)'],\n", " dtype='object')" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cats = (df\n", " .beer_style\n", " .cat\n", " .categories[df.beer_style.cat.categories.str.contains('IPA')]\n", " )\n", "cats" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 False\n", "2 False\n", " ... \n", "49997 False\n", "49998 False\n", "49999 False\n", "Name: beer_style, dtype: bool" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.beer_style.isin(cats)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_idbeer_name...profile_namereview_tastetexttime
39.5285773818Unearthly (Imperial India Pale Ale)...nick764.0The aroma has pine, wood, citrus, caramel, an...2009-10-05 21:32:37
86.76549140Northern Hemisphere Harvest Wet Hop Ale...david184.0I like all of Sierra Nevada's beers but felt ...2009-10-05 21:34:31
168.0361793818Hoppe (Imperial Extra Pale Ale)...nick763.0The aroma is papery with citrus, yeast, and s...2009-10-05 21:43:23
..............................
499477.0709199Big Eye IPA...billshmeinke4.012oz into my pint glass.\\t\\tA: Golden honey a...2009-12-25 08:48:11
499846.0383883718L'IPA Du Lièvre...hlance4.5I love this beer, seek it out when I am in Mo...2009-12-25 16:25:45
499968.07971863Pliny The Elder...hrking4.0This is a big hoppy monster of an IPA..If you...2009-12-25 17:23:24
\n", "

7774 rows × 13 columns

\n", "
" ], "text/plain": [ " abv beer_id brewer_id beer_name ... \\\n", "3 9.5 28577 3818 Unearthly (Imperial India Pale Ale) ... \n", "8 6.7 6549 140 Northern Hemisphere Harvest Wet Hop Ale ... \n", "16 8.0 36179 3818 Hoppe (Imperial Extra Pale Ale) ... \n", "... ... ... ... ... ... \n", "49947 7.0 709 199 Big Eye IPA ... \n", "49984 6.0 38388 3718 L'IPA Du Lièvre ... \n", "49996 8.0 7971 863 Pliny The Elder ... \n", "\n", " profile_name review_taste text \\\n", "3 nick76 4.0 The aroma has pine, wood, citrus, caramel, an... \n", "8 david18 4.0 I like all of Sierra Nevada's beers but felt ... \n", "16 nick76 3.0 The aroma is papery with citrus, yeast, and s... \n", "... ... ... ... \n", "49947 billshmeinke 4.0 12oz into my pint glass.\\t\\tA: Golden honey a... \n", "49984 hlance 4.5 I love this beer, seek it out when I am in Mo... \n", "49996 hrking 4.0 This is a big hoppy monster of an IPA..If you... \n", "\n", " time \n", "3 2009-10-05 21:32:37 \n", "8 2009-10-05 21:34:31 \n", "16 2009-10-05 21:43:23 \n", "... ... \n", "49947 2009-12-25 08:48:11 \n", "49984 2009-12-25 16:25:45 \n", "49996 2009-12-25 17:23:24 \n", "\n", "[7774 rows x 13 columns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.beer_style.isin(cats)]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "This is quite powerful. Any method that returns a boolean array is potentially an indexer." ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/plain": [ "1904 240\n", "53863 208\n", "52441 158\n", " ... \n", "41285 1\n", "47430 1\n", "53274 1\n", "Name: beer_id, dtype: int64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "beer_ids = df.beer_id.value_counts()\n", "beer_ids" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_idbeer_name...profile_namereview_tastetexttime
1428.652441147Stone 09.09.09 Vertical Epic Ale...aubuc14.5Poured in to a chimay goblet.\\t\\tPours black ...2009-10-06 00:10:46
4468.652441147Stone 09.09.09 Vertical Epic Ale...corby1123.5Pours pitch black and completely opaque witho...2009-10-06 07:59:01
7148.652441147Stone 09.09.09 Vertical Epic Ale...alcstradamus3.5Near black pour with a mountainous 3 finger h...2009-10-06 21:31:06
..............................
497156.81904140Sierra Nevada Celebration Ale...betterbgood2.5I've tried this beer on tap about a year ago,...2009-12-25 01:13:38
498448.652441147Stone 09.09.09 Vertical Epic Ale...russpowell4.0thanks to colonelforbin for this one!\\t\\tPour...2009-12-25 04:10:49
498635.55386328Our Special Ale 2009 (Anchor Christmas Ale)...FickleBeast4.012 oz bottle poured into a pint glass \\t\\tPou...2009-12-25 04:38:07
\n", "

606 rows × 13 columns

\n", "
" ], "text/plain": [ " abv beer_id brewer_id beer_name ... \\\n", "142 8.6 52441 147 Stone 09.09.09 Vertical Epic Ale ... \n", "446 8.6 52441 147 Stone 09.09.09 Vertical Epic Ale ... \n", "714 8.6 52441 147 Stone 09.09.09 Vertical Epic Ale ... \n", "... ... ... ... ... ... \n", "49715 6.8 1904 140 Sierra Nevada Celebration Ale ... \n", "49844 8.6 52441 147 Stone 09.09.09 Vertical Epic Ale ... \n", "49863 5.5 53863 28 Our Special Ale 2009 (Anchor Christmas Ale) ... \n", "\n", " profile_name review_taste text \\\n", "142 aubuc1 4.5 Poured in to a chimay goblet.\\t\\tPours black ... \n", "446 corby112 3.5 Pours pitch black and completely opaque witho... \n", "714 alcstradamus 3.5 Near black pour with a mountainous 3 finger h... \n", "... ... ... ... \n", "49715 betterbgood 2.5 I've tried this beer on tap about a year ago,... \n", "49844 russpowell 4.0 thanks to colonelforbin for this one!\\t\\tPour... \n", "49863 FickleBeast 4.0 12 oz bottle poured into a pint glass \\t\\tPou... \n", "\n", " time \n", "142 2009-10-06 00:10:46 \n", "446 2009-10-06 07:59:01 \n", "714 2009-10-06 21:31:06 \n", "... ... \n", "49715 2009-12-25 01:13:38 \n", "49844 2009-12-25 04:10:49 \n", "49863 2009-12-25 04:38:07 \n", "\n", "[606 rows x 13 columns]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.beer_id.isin(beer_ids[0:3].index)]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Positional Indexing" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_idbeer_name...profile_namereview_tastetexttime
07.02511287Bell's Cherry Stout...blaheath4.5Batch 8144\\tPitch black in color with a 1/2 f...2009-10-05 21:31:48
15.7197369790Duck-Rabbit Porter...GJ404.0Sampled from a 12oz bottle in a standard pint...2009-10-05 21:32:09
24.8110983182Fürstenberg Premium Pilsener...biegaman3.5Haystack yellow with an energetic group of bu...2009-10-05 21:32:13
39.5285773818Unearthly (Imperial India Pale Ale)...nick764.0The aroma has pine, wood, citrus, caramel, an...2009-10-05 21:32:37
45.8398119Wolaver's Pale Ale...champ1033.0A: Pours a slightly hazy golden/orange color....2009-10-05 21:33:14
\n", "

5 rows × 13 columns

\n", "
" ], "text/plain": [ " abv beer_id brewer_id beer_name ... profile_name \\\n", "0 7.0 2511 287 Bell's Cherry Stout ... blaheath \n", "1 5.7 19736 9790 Duck-Rabbit Porter ... GJ40 \n", "2 4.8 11098 3182 Fürstenberg Premium Pilsener ... biegaman \n", "3 9.5 28577 3818 Unearthly (Imperial India Pale Ale) ... nick76 \n", "4 5.8 398 119 Wolaver's Pale Ale ... champ103 \n", "\n", " review_taste text time \n", "0 4.5 Batch 8144\\tPitch black in color with a 1/2 f... 2009-10-05 21:31:48 \n", "1 4.0 Sampled from a 12oz bottle in a standard pint... 2009-10-05 21:32:09 \n", "2 3.5 Haystack yellow with an energetic group of bu... 2009-10-05 21:32:13 \n", "3 4.0 The aroma has pine, wood, citrus, caramel, an... 2009-10-05 21:32:37 \n", "4 3.0 A: Pours a slightly hazy golden/orange color.... 2009-10-05 21:33:14 \n", "\n", "[5 rows x 13 columns]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(5)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_idbeer_name...profile_namereview_tastetexttime
24.8110983182Fürstenberg Premium Pilsener...biegaman3.5Haystack yellow with an energetic group of bu...2009-10-05 21:32:13
57.0966365Pike Street XXXXX Stout...sprucetip4.5From notes. Pours black, thin mocha head fade...2009-10-05 21:33:48
1011.843670423Bourbon Barrel Quad (BBQ)...blaheath4.5Burnt amber in color with a 1/4\" head. Aroma ...2009-10-05 21:36:03
\n", "

3 rows × 13 columns

\n", "
" ], "text/plain": [ " abv beer_id brewer_id beer_name ... profile_name \\\n", "2 4.8 11098 3182 Fürstenberg Premium Pilsener ... biegaman \n", "5 7.0 966 365 Pike Street XXXXX Stout ... sprucetip \n", "10 11.8 43670 423 Bourbon Barrel Quad (BBQ) ... blaheath \n", "\n", " review_taste text time \n", "2 3.5 Haystack yellow with an energetic group of bu... 2009-10-05 21:32:13 \n", "5 4.5 From notes. Pours black, thin mocha head fade... 2009-10-05 21:33:48 \n", "10 4.5 Burnt amber in color with a 1/4\" head. Aroma ... 2009-10-05 21:36:03 \n", "\n", "[3 rows x 13 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[[2,5,10]]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbeer_idbrewer_id
24.8110983182
57.0966365
1011.843670423
\n", "
" ], "text/plain": [ " abv beer_id brewer_id\n", "2 4.8 11098 3182\n", "5 7.0 966 365\n", "10 11.8 43670 423" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.iloc[[2,5,10],0:3]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Location Based Indexing" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
beer_idtime
2110982009-10-05 21:32:13
59662009-10-05 21:33:48
10436702009-10-05 21:36:03
\n", "
" ], "text/plain": [ " beer_id time\n", "2 11098 2009-10-05 21:32:13\n", "5 966 2009-10-05 21:33:48\n", "10 43670 2009-10-05 21:36:03" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[[2,5,10],['beer_id','time']]" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
beer_idtime
142524412009-10-06 00:10:46
446524412009-10-06 07:59:01
714524412009-10-06 21:31:06
.........
4971519042009-12-25 01:13:38
49844524412009-12-25 04:10:49
49863538632009-12-25 04:38:07
\n", "

606 rows × 2 columns

\n", "
" ], "text/plain": [ " beer_id time\n", "142 52441 2009-10-06 00:10:46\n", "446 52441 2009-10-06 07:59:01\n", "714 52441 2009-10-06 21:31:06\n", "... ... ...\n", "49715 1904 2009-12-25 01:13:38\n", "49844 52441 2009-12-25 04:10:49\n", "49863 53863 2009-12-25 04:38:07\n", "\n", "[606 rows x 2 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.loc[df.beer_id.isin(beer_ids[0:3].index),['beer_id','time']]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Questions\n", "- why we use ``.loc``\n", "- why do we care about uniqueness\n", "- why do we use ``.ix``" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.index.is_unique" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.set_index('beer_id').index.is_unique" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Hierarchical Indexing\n", "\n", "One of the most powerful and most complicated features of pandas.\n", "Let's you represent high-dimensional datasets in a table." ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbrewer_idbeer_namebeer_style...review_overallreview_palatereview_tastetext
profile_namebeer_idtime
01121987290772009-11-30 03:44:429.011256Corne De BrumeScotch Ale / Wee Heavy...543.5Poured into a belgian beer glass, not great h...
05Harley13072009-10-06 00:10:068.5428Der Weisse BockWeizenbock...444.0Can't find the date on this one.\\t\\tPurchased...
27322009-12-12 01:21:368.0287Bell's Consecrator DoppelbockDoppelbock...444.5Bottle # 8881 (02/09)\\t\\tPurchased through We...
28992009-10-20 22:27:017.1911Andechser Doppelbock DunkelDoppelbock...545.0Bottle # 300310\\t\\tPurchased through Kracked ...
30542009-11-21 02:17:415.0946Piton Lager BeerAmerican Adjunct Lager...323.0Bottled in 2007.\\t\\tPurchased in St. Lucia @ ...
\n", "

5 rows × 10 columns

\n", "
" ], "text/plain": [ " abv brewer_id beer_name \\\n", "profile_name beer_id time \n", "01121987 29077 2009-11-30 03:44:42 9.0 11256 Corne De Brume \n", "05Harley 1307 2009-10-06 00:10:06 8.5 428 Der Weisse Bock \n", " 2732 2009-12-12 01:21:36 8.0 287 Bell's Consecrator Doppelbock \n", " 2899 2009-10-20 22:27:01 7.1 911 Andechser Doppelbock Dunkel \n", " 3054 2009-11-21 02:17:41 5.0 946 Piton Lager Beer \n", "\n", " beer_style \\\n", "profile_name beer_id time \n", "01121987 29077 2009-11-30 03:44:42 Scotch Ale / Wee Heavy \n", "05Harley 1307 2009-10-06 00:10:06 Weizenbock \n", " 2732 2009-12-12 01:21:36 Doppelbock \n", " 2899 2009-10-20 22:27:01 Doppelbock \n", " 3054 2009-11-21 02:17:41 American Adjunct Lager \n", "\n", " ... \\\n", "profile_name beer_id time ... \n", "01121987 29077 2009-11-30 03:44:42 ... \n", "05Harley 1307 2009-10-06 00:10:06 ... \n", " 2732 2009-12-12 01:21:36 ... \n", " 2899 2009-10-20 22:27:01 ... \n", " 3054 2009-11-21 02:17:41 ... \n", "\n", " review_overall review_palate review_taste \\\n", "profile_name beer_id time \n", "01121987 29077 2009-11-30 03:44:42 5 4 3.5 \n", "05Harley 1307 2009-10-06 00:10:06 4 4 4.0 \n", " 2732 2009-12-12 01:21:36 4 4 4.5 \n", " 2899 2009-10-20 22:27:01 5 4 5.0 \n", " 3054 2009-11-21 02:17:41 3 2 3.0 \n", "\n", " text \n", "profile_name beer_id time \n", "01121987 29077 2009-11-30 03:44:42 Poured into a belgian beer glass, not great h... \n", "05Harley 1307 2009-10-06 00:10:06 Can't find the date on this one.\\t\\tPurchased... \n", " 2732 2009-12-12 01:21:36 Bottle # 8881 (02/09)\\t\\tPurchased through We... \n", " 2899 2009-10-20 22:27:01 Bottle # 300310\\t\\tPurchased through Kracked ... \n", " 3054 2009-11-21 02:17:41 Bottled in 2007.\\t\\tPurchased in St. Lucia @ ... \n", "\n", "[5 rows x 10 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reviews = df.set_index(['profile_name', 'beer_id', 'time']).sort_index()\n", "reviews.head()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "# Why is this useful" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reviews.index.is_unique" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/plain": [ "drabmuh 242\n", "corby112 230\n", "BeerFMAndy 202\n", "northyorksammy 201\n", "mrmanning 187\n", "Name: profile_name, dtype: int64" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_reviewers = (reviews\n", " .index\n", " .get_level_values('profile_name')\n", " .value_counts()\n", " .head(5)\n", " )\n", "top_reviewers" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbrewer_idbeer_namebeer_style...review_overallreview_palatereview_tastetext
profile_namebeer_idtime
BeerFMAndy922009-12-24 21:51:467.2147Arrogant Bastard AleAmerican Strong Ale...4.54.04.022 oz bottle poured into a Sierra Nevada Impe...
1002009-10-22 03:39:215.7306Blue Moon Harvest Moon Pumpkin AlePumpkin Ale...2.02.52.012 oz bottle poured into an Imperial Pint. Ma...
....................................
northyorksammy549182009-12-21 14:56:2912.02097Kuhnhenn Bonicci Barley WineAmerican Barleywine...3.03.53.5Winner of the amateur brewing fest,Jerry Boni...
549192009-12-21 14:59:397.02097Kuhnhenn Foreign Export StoutForeign / Export Stout...3.54.03.5Thick stout, a study in chocolate. Very smoot...
\n", "

1062 rows × 10 columns

\n", "
" ], "text/plain": [ " abv brewer_id beer_name \\\n", "profile_name beer_id time \n", "BeerFMAndy 92 2009-12-24 21:51:46 7.2 147 Arrogant Bastard Ale \n", " 100 2009-10-22 03:39:21 5.7 306 Blue Moon Harvest Moon Pumpkin Ale \n", "... ... ... ... \n", "northyorksammy 54918 2009-12-21 14:56:29 12.0 2097 Kuhnhenn Bonicci Barley Wine \n", " 54919 2009-12-21 14:59:39 7.0 2097 Kuhnhenn Foreign Export Stout \n", "\n", " beer_style \\\n", "profile_name beer_id time \n", "BeerFMAndy 92 2009-12-24 21:51:46 American Strong Ale \n", " 100 2009-10-22 03:39:21 Pumpkin Ale \n", "... ... \n", "northyorksammy 54918 2009-12-21 14:56:29 American Barleywine \n", " 54919 2009-12-21 14:59:39 Foreign / Export Stout \n", "\n", " ... \\\n", "profile_name beer_id time ... \n", "BeerFMAndy 92 2009-12-24 21:51:46 ... \n", " 100 2009-10-22 03:39:21 ... \n", "... ... \n", "northyorksammy 54918 2009-12-21 14:56:29 ... \n", " 54919 2009-12-21 14:59:39 ... \n", "\n", " review_overall review_palate review_taste \\\n", "profile_name beer_id time \n", "BeerFMAndy 92 2009-12-24 21:51:46 4.5 4.0 4.0 \n", " 100 2009-10-22 03:39:21 2.0 2.5 2.0 \n", "... ... ... ... \n", "northyorksammy 54918 2009-12-21 14:56:29 3.0 3.5 3.5 \n", " 54919 2009-12-21 14:59:39 3.5 4.0 3.5 \n", "\n", " text \n", "profile_name beer_id time \n", "BeerFMAndy 92 2009-12-24 21:51:46 22 oz bottle poured into a Sierra Nevada Impe... \n", " 100 2009-10-22 03:39:21 12 oz bottle poured into an Imperial Pint. Ma... \n", "... ... \n", "northyorksammy 54918 2009-12-21 14:56:29 Winner of the amateur brewing fest,Jerry Boni... \n", " 54919 2009-12-21 14:59:39 Thick stout, a study in chocolate. Very smoot... \n", "\n", "[1062 rows x 10 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.display.max_rows=4\n", "reviews.loc[top_reviewers.index, :, :]" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
abvbrewer_idbeer_namebeer_style...review_overallreview_palatereview_tastetext
profile_namebeer_idtime
BeerFMAndy1002009-10-22 03:39:215.7306Blue Moon Harvest Moon Pumpkin AlePumpkin Ale...22.5212 oz bottle poured into an Imperial Pint. Ma...
\n", "

1 rows × 10 columns

\n", "
" ], "text/plain": [ " abv brewer_id beer_name \\\n", "profile_name beer_id time \n", "BeerFMAndy 100 2009-10-22 03:39:21 5.7 306 Blue Moon Harvest Moon Pumpkin Ale \n", "\n", " beer_style \\\n", "profile_name beer_id time \n", "BeerFMAndy 100 2009-10-22 03:39:21 Pumpkin Ale \n", "\n", " ... \\\n", "profile_name beer_id time ... \n", "BeerFMAndy 100 2009-10-22 03:39:21 ... \n", "\n", " review_overall review_palate review_taste \\\n", "profile_name beer_id time \n", "BeerFMAndy 100 2009-10-22 03:39:21 2 2.5 2 \n", "\n", " text \n", "profile_name beer_id time \n", "BeerFMAndy 100 2009-10-22 03:39:21 12 oz bottle poured into an Imperial Pint. Ma... \n", "\n", "[1 rows x 10 columns]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reviews.loc[[('BeerFMAndy',100,pd.Timestamp('2009-10-22 03:39:21'))]]" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "### Specifying multi-axis indexers" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "idx = pd.IndexSlice" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
beer_namebeer_style
profile_namebeer_idtime
BeerFMAndy922009-12-24 21:51:46Arrogant Bastard AleAmerican Strong Ale
northyorksammy549192009-12-21 14:59:39Kuhnhenn Foreign Export StoutForeign / Export Stout
\n", "
" ], "text/plain": [ " beer_name beer_style\n", "profile_name beer_id time \n", "BeerFMAndy 92 2009-12-24 21:51:46 Arrogant Bastard Ale American Strong Ale\n", "northyorksammy 54919 2009-12-21 14:59:39 Kuhnhenn Foreign Export Stout Foreign / Export Stout" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reviews.loc[idx[top_reviewers.index, [92,54919], :], \n", " ['beer_name','beer_style']]" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
beer_namebeer_style
profile_namebeer_idtime
BeerFMAndy922009-12-24 21:51:46Arrogant Bastard AleAmerican Strong Ale
northyorksammy549192009-12-21 14:59:39Kuhnhenn Foreign Export StoutForeign / Export Stout
\n", "
" ], "text/plain": [ " beer_name beer_style\n", "profile_name beer_id time \n", "BeerFMAndy 92 2009-12-24 21:51:46 Arrogant Bastard Ale American Strong Ale\n", "northyorksammy 54919 2009-12-21 14:59:39 Kuhnhenn Foreign Export Stout Foreign / Export Stout" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(reviews\n", " .query('profile_name in @top_reviewers.index and \\\n", " beer_id in [92,54919]')\n", " [['beer_name','beer_style']]\n", " )" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
beer_namebeer_style
profile_namebeer_idtime
Arnie2709922009-11-23 17:16:39Arrogant Bastard AleAmerican Strong Ale
BatsforBeer922009-11-06 04:10:13Arrogant Bastard AleAmerican Strong Ale
...............
wahhmaster922009-10-07 18:02:47Arrogant Bastard AleAmerican Strong Ale
woosterbill922009-11-12 03:00:54Arrogant Bastard AleAmerican Strong Ale
\n", "

72 rows × 2 columns

\n", "
" ], "text/plain": [ " beer_name beer_style\n", "profile_name beer_id time \n", "Arnie2709 92 2009-11-23 17:16:39 Arrogant Bastard Ale American Strong Ale\n", "BatsforBeer 92 2009-11-06 04:10:13 Arrogant Bastard Ale American Strong Ale\n", "... ... ...\n", "wahhmaster 92 2009-10-07 18:02:47 Arrogant Bastard Ale American Strong Ale\n", "woosterbill 92 2009-11-12 03:00:54 Arrogant Bastard Ale American Strong Ale\n", "\n", "[72 rows x 2 columns]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reviews.loc[idx[:, [92, 54919], :], ['beer_name' ,'beer_style']]" ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }