{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.20.3\n" ] } ], "source": [ "import pandas as pd\n", "import os\n", "\n", "print pd.__version__" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Removing Columns/Rows (Vid-6)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sepal_lensepal_widpetal_lenpetal_widclass
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
\n", "
" ], "text/plain": [ " sepal_len sepal_wid petal_len petal_wid class\n", "0 5.1 3.5 1.4 0.2 Iris-setosa\n", "1 4.9 3.0 1.4 0.2 Iris-setosa\n", "2 4.7 3.2 1.3 0.2 Iris-setosa\n", "3 4.6 3.1 1.5 0.2 Iris-setosa\n", "4 5.0 3.6 1.4 0.2 Iris-setosa" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "DATA_DIR = '../data'\n", "# reading table\n", "# making seperator as comma\n", "# renaming column names for 0th row of the file\n", "df = pd.read_table(\n", " os.path.abspath(os.path.join(DATA_DIR,'day1/iris.csv')), \n", " sep=',',\n", " header=0,\n", " names=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']\n", " )\n", "df.head(5)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(150, 5)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# see dimension of the dataset\n", "# 150 rows, 5 columns\n", "df.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Column Drop" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sepal_lensepal_widpetal_lenpetal_wid
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
\n", "
" ], "text/plain": [ " sepal_len sepal_wid petal_len petal_wid\n", "0 5.1 3.5 1.4 0.2\n", "1 4.9 3.0 1.4 0.2\n", "2 4.7 3.2 1.3 0.2\n", "3 4.6 3.1 1.5 0.2\n", "4 5.0 3.6 1.4 0.2" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# drop method takes the column names in array\n", "# axis=1 corresponds to columns\n", "# inplace=True does not require you to hold it in other variable, memory efficient\n", "df.drop(['class'], axis=1, inplace=True)\n", "df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Row Drop" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sepal_lensepal_widpetal_lenpetal_wid
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
64.63.41.40.3
\n", "
" ], "text/plain": [ " sepal_len sepal_wid petal_len petal_wid\n", "2 4.7 3.2 1.3 0.2\n", "3 4.6 3.1 1.5 0.2\n", "4 5.0 3.6 1.4 0.2\n", "5 5.4 3.9 1.7 0.4\n", "6 4.6 3.4 1.4 0.3" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# drop method takes the row names in array\n", "# axis=0 corresponds to rows, bydefault axis=0 in drop method\n", "# inplace=True does not require you to hold it in other variable, memory efficient\n", "df.drop([0, 1], axis=0, inplace=True)\n", "df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Takeaways\n", "\n", "1. Keep in practice to always specify 'axis' parameter in drop method or other necessary methods for better understanding." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# -----------------------" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sorting (Vid-7)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
09.3The Shawshank RedemptionRCrime142[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...
19.2The GodfatherRCrime175[u'Marlon Brando', u'Al Pacino', u'James Caan']
29.1The Godfather: Part IIRCrime200[u'Al Pacino', u'Robert De Niro', u'Robert Duv...
39.0The Dark KnightPG-13Action152[u'Christian Bale', u'Heath Ledger', u'Aaron E...
48.9Pulp FictionRCrime154[u'John Travolta', u'Uma Thurman', u'Samuel L....
\n", "
" ], "text/plain": [ " star_rating title content_rating genre duration \\\n", "0 9.3 The Shawshank Redemption R Crime 142 \n", "1 9.2 The Godfather R Crime 175 \n", "2 9.1 The Godfather: Part II R Crime 200 \n", "3 9.0 The Dark Knight PG-13 Action 152 \n", "4 8.9 Pulp Fiction R Crime 154 \n", "\n", " actors_list \n", "0 [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt... \n", "1 [u'Marlon Brando', u'Al Pacino', u'James Caan'] \n", "2 [u'Al Pacino', u'Robert De Niro', u'Robert Duv... \n", "3 [u'Christian Bale', u'Heath Ledger', u'Aaron E... \n", "4 [u'John Travolta', u'Uma Thurman', u'Samuel L.... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_table(\n", " 'http://bit.ly/imdbratings', \n", " sep=','\n", " )\n", "df.head(5)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "978 7.4\n", "950 7.4\n", "949 7.4\n", "948 7.4\n", "947 7.4\n", "Name: star_rating, dtype: float64\n", "6 8.9\n", "3 9.0\n", "2 9.1\n", "1 9.2\n", "0 9.3\n", "Name: star_rating, dtype: float64\n" ] } ], "source": [ "# sort_values() method returns bydefault by ascending order\n", "# sort_values() can take 'inplace=True/False' for changing the values inplace\n", "print df['star_rating'].sort_values().head(5)\n", "print df['star_rating'].sort_values().tail(5)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 9.3\n", "1 9.2\n", "2 9.1\n", "3 9.0\n", "6 8.9\n", "Name: star_rating, dtype: float64\n", "947 7.4\n", "948 7.4\n", "949 7.4\n", "950 7.4\n", "978 7.4\n", "Name: star_rating, dtype: float64\n" ] } ], "source": [ "# ascending=True/False parameter in sort_values() can decide the sorting order\n", "print df['star_rating'].sort_values(ascending=False).head(5)\n", "print df['star_rating'].sort_values(ascending=False).tail(5)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
3898.0FreaksUNRATEDDrama64[u'Wallace Ford', u'Leila Hyams', u'Olga Bacla...
3388.0Battleship PotemkinUNRATEDHistory66[u'Aleksandr Antonov', u'Vladimir Barsky', u'G...
2588.1The Cabinet of Dr. CaligariUNRATEDCrime67[u'Werner Krauss', u'Conrad Veidt', u'Friedric...
2938.1Duck SoupPASSEDComedy68[u'Groucho Marx', u'Harpo Marx', u'Chico Marx']
888.4The KidNOT RATEDComedy68[u'Charles Chaplin', u'Edna Purviance', u'Jack...
\n", "
" ], "text/plain": [ " star_rating title content_rating genre \\\n", "389 8.0 Freaks UNRATED Drama \n", "338 8.0 Battleship Potemkin UNRATED History \n", "258 8.1 The Cabinet of Dr. Caligari UNRATED Crime \n", "293 8.1 Duck Soup PASSED Comedy \n", "88 8.4 The Kid NOT RATED Comedy \n", "\n", " duration actors_list \n", "389 64 [u'Wallace Ford', u'Leila Hyams', u'Olga Bacla... \n", "338 66 [u'Aleksandr Antonov', u'Vladimir Barsky', u'G... \n", "258 67 [u'Werner Krauss', u'Conrad Veidt', u'Friedric... \n", "293 68 [u'Groucho Marx', u'Harpo Marx', u'Chico Marx'] \n", "88 68 [u'Charles Chaplin', u'Edna Purviance', u'Jack... " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# relatively better way to do is to use the below mentioned technique\n", "# to sort by multiple fields, just populate the array inside sort_values()\n", "df.sort_values(['duration'], ascending=True).head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Takeaways\n", "\n", "1. Pandas dataframe is table having rows and columns.\n", "2. Pandas Series is just one column in the dataframe.\n", "3. sort_values() method returns bydefault by ascending order." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# -----------------------\n", "\n", "# Single Filter (Vid-8)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
09.3The Shawshank RedemptionRCrime142[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...
19.2The GodfatherRCrime175[u'Marlon Brando', u'Al Pacino', u'James Caan']
29.1The Godfather: Part IIRCrime200[u'Al Pacino', u'Robert De Niro', u'Robert Duv...
39.0The Dark KnightPG-13Action152[u'Christian Bale', u'Heath Ledger', u'Aaron E...
48.9Pulp FictionRCrime154[u'John Travolta', u'Uma Thurman', u'Samuel L....
\n", "
" ], "text/plain": [ " star_rating title content_rating genre duration \\\n", "0 9.3 The Shawshank Redemption R Crime 142 \n", "1 9.2 The Godfather R Crime 175 \n", "2 9.1 The Godfather: Part II R Crime 200 \n", "3 9.0 The Dark Knight PG-13 Action 152 \n", "4 8.9 Pulp Fiction R Crime 154 \n", "\n", " actors_list \n", "0 [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt... \n", "1 [u'Marlon Brando', u'Al Pacino', u'James Caan'] \n", "2 [u'Al Pacino', u'Robert De Niro', u'Robert Duv... \n", "3 [u'Christian Bale', u'Heath Ledger', u'Aaron E... \n", "4 [u'John Travolta', u'Uma Thurman', u'Samuel L.... " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_table(\n", " 'http://bit.ly/imdbratings', \n", " sep=','\n", " )\n", "df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experiment 1" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
358.6Modern TimesGComedy87[u'Charles Chaplin', u'Paulette Goddard', u'He...
368.6Saving Private RyanRAction169[u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']
378.6Raiders of the Lost ArkPGAction115[u'Harrison Ford', u'Karen Allen', u'Paul Free...
388.6Rear WindowAPPROVEDMystery112[u'James Stewart', u'Grace Kelly', u'Wendell C...
398.6PsychoRHorror109[u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...
\n", "
" ], "text/plain": [ " star_rating title content_rating genre duration \\\n", "35 8.6 Modern Times G Comedy 87 \n", "36 8.6 Saving Private Ryan R Action 169 \n", "37 8.6 Raiders of the Lost Ark PG Action 115 \n", "38 8.6 Rear Window APPROVED Mystery 112 \n", "39 8.6 Psycho R Horror 109 \n", "\n", " actors_list \n", "35 [u'Charles Chaplin', u'Paulette Goddard', u'He... \n", "36 [u'Tom Hanks', u'Matt Damon', u'Tom Sizemore'] \n", "37 [u'Harrison Ford', u'Karen Allen', u'Paul Free... \n", "38 [u'James Stewart', u'Grace Kelly', u'Wendell C... \n", "39 [u'Anthony Perkins', u'Janet Leigh', u'Vera Mi... " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# we need movies above 8.5\n", "df_rating_bools = df['star_rating'].map(lambda row: row>8.5)\n", "df[df_rating_bools].tail(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experiment 2" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
358.6Modern TimesGComedy87[u'Charles Chaplin', u'Paulette Goddard', u'He...
368.6Saving Private RyanRAction169[u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']
378.6Raiders of the Lost ArkPGAction115[u'Harrison Ford', u'Karen Allen', u'Paul Free...
388.6Rear WindowAPPROVEDMystery112[u'James Stewart', u'Grace Kelly', u'Wendell C...
398.6PsychoRHorror109[u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...
\n", "
" ], "text/plain": [ " star_rating title content_rating genre duration \\\n", "35 8.6 Modern Times G Comedy 87 \n", "36 8.6 Saving Private Ryan R Action 169 \n", "37 8.6 Raiders of the Lost Ark PG Action 115 \n", "38 8.6 Rear Window APPROVED Mystery 112 \n", "39 8.6 Psycho R Horror 109 \n", "\n", " actors_list \n", "35 [u'Charles Chaplin', u'Paulette Goddard', u'He... \n", "36 [u'Tom Hanks', u'Matt Damon', u'Tom Sizemore'] \n", "37 [u'Harrison Ford', u'Karen Allen', u'Paul Free... \n", "38 [u'James Stewart', u'Grace Kelly', u'Wendell C... \n", "39 [u'Anthony Perkins', u'Janet Leigh', u'Vera Mi... " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# we need movies above 8.5\n", "boolean = list()\n", "for row in df['star_rating']:\n", " if row > 8.5: boolean.append(True)\n", " else: boolean.append(False)\n", "\n", "# boolean is a list, and since column in pandas is a series, so we need to convert list to series\n", "df_rating_bools = pd.Series(boolean)\n", "df[df_rating_bools].tail(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experiment 3" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
358.6Modern TimesGComedy87[u'Charles Chaplin', u'Paulette Goddard', u'He...
368.6Saving Private RyanRAction169[u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']
378.6Raiders of the Lost ArkPGAction115[u'Harrison Ford', u'Karen Allen', u'Paul Free...
388.6Rear WindowAPPROVEDMystery112[u'James Stewart', u'Grace Kelly', u'Wendell C...
398.6PsychoRHorror109[u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...
\n", "
" ], "text/plain": [ " star_rating title content_rating genre duration \\\n", "35 8.6 Modern Times G Comedy 87 \n", "36 8.6 Saving Private Ryan R Action 169 \n", "37 8.6 Raiders of the Lost Ark PG Action 115 \n", "38 8.6 Rear Window APPROVED Mystery 112 \n", "39 8.6 Psycho R Horror 109 \n", "\n", " actors_list \n", "35 [u'Charles Chaplin', u'Paulette Goddard', u'He... \n", "36 [u'Tom Hanks', u'Matt Damon', u'Tom Sizemore'] \n", "37 [u'Harrison Ford', u'Karen Allen', u'Paul Free... \n", "38 [u'James Stewart', u'Grace Kelly', u'Wendell C... \n", "39 [u'Anthony Perkins', u'Janet Leigh', u'Vera Mi... " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df['star_rating'] > 8.5 automatically searches/iterates through all the rows satisying this condition\n", "df[df['star_rating'] > 8.5].tail(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Takeaways\n", "\n", "1. Try practicing, Experiment 3 while coding." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# -----------------------\n", "\n", "# Multiple Filter (Vid-9)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
09.3The Shawshank RedemptionRCrime142[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...
19.2The GodfatherRCrime175[u'Marlon Brando', u'Al Pacino', u'James Caan']
29.1The Godfather: Part IIRCrime200[u'Al Pacino', u'Robert De Niro', u'Robert Duv...
39.0The Dark KnightPG-13Action152[u'Christian Bale', u'Heath Ledger', u'Aaron E...
48.9Pulp FictionRCrime154[u'John Travolta', u'Uma Thurman', u'Samuel L....
\n", "
" ], "text/plain": [ " star_rating title content_rating genre duration \\\n", "0 9.3 The Shawshank Redemption R Crime 142 \n", "1 9.2 The Godfather R Crime 175 \n", "2 9.1 The Godfather: Part II R Crime 200 \n", "3 9.0 The Dark Knight PG-13 Action 152 \n", "4 8.9 Pulp Fiction R Crime 154 \n", "\n", " actors_list \n", "0 [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt... \n", "1 [u'Marlon Brando', u'Al Pacino', u'James Caan'] \n", "2 [u'Al Pacino', u'Robert De Niro', u'Robert Duv... \n", "3 [u'Christian Bale', u'Heath Ledger', u'Aaron E... \n", "4 [u'John Travolta', u'Uma Thurman', u'Samuel L.... " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_table(\n", " 'http://bit.ly/imdbratings', \n", " sep=','\n", " )\n", "df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experiment 1" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
78.9The Lord of the Rings: The Return of the KingPG-13Adventure201[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK...
178.7Seven SamuraiUNRATEDDrama207[u'Toshir\\xf4 Mifune', u'Takashi Shimura', u'K...
\n", "
" ], "text/plain": [ " star_rating title content_rating \\\n", "7 8.9 The Lord of the Rings: The Return of the King PG-13 \n", "17 8.7 Seven Samurai UNRATED \n", "\n", " genre duration actors_list \n", "7 Adventure 201 [u'Elijah Wood', u'Viggo Mortensen', u'Ian McK... \n", "17 Drama 207 [u'Toshir\\xf4 Mifune', u'Takashi Shimura', u'K... " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# we want movies that have rating above 8.5 and duration above 200mins\n", "df[(df['star_rating'] > 8.5) & (df['duration'] > 200)].head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experiment 2" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
78.9The Lord of the Rings: The Return of the KingPG-13Adventure201[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK...
178.7Seven SamuraiUNRATEDDrama207[u'Toshir\\xf4 Mifune', u'Takashi Shimura', u'K...
\n", "
" ], "text/plain": [ " star_rating title content_rating \\\n", "7 8.9 The Lord of the Rings: The Return of the King PG-13 \n", "17 8.7 Seven Samurai UNRATED \n", "\n", " genre duration actors_list \n", "7 Adventure 201 [u'Elijah Wood', u'Viggo Mortensen', u'Ian McK... \n", "17 Drama 207 [u'Toshir\\xf4 Mifune', u'Takashi Shimura', u'K... " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_rating_bools = df['star_rating'].map(lambda row: row>8.5)\n", "df_duration_bools = df['duration'].map(lambda row: row>200)\n", "df[df_rating_bools & df_duration_bools].tail(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experiment 3" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
star_ratingtitlecontent_ratinggenredurationactors_list
39.0The Dark KnightPG-13Action152[u'Christian Bale', u'Heath Ledger', u'Aaron E...
58.912 Angry MenNOT RATEDDrama96[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...
98.9Fight ClubRDrama139[u'Brad Pitt', u'Edward Norton', u'Helena Bonh...
118.8InceptionPG-13Action148[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...
128.8Star Wars: Episode V - The Empire Strikes BackPGAction124[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi...
\n", "
" ], "text/plain": [ " star_rating title \\\n", "3 9.0 The Dark Knight \n", "5 8.9 12 Angry Men \n", "9 8.9 Fight Club \n", "11 8.8 Inception \n", "12 8.8 Star Wars: Episode V - The Empire Strikes Back \n", "\n", " content_rating genre duration \\\n", "3 PG-13 Action 152 \n", "5 NOT RATED Drama 96 \n", "9 R Drama 139 \n", "11 PG-13 Action 148 \n", "12 PG Action 124 \n", "\n", " actors_list \n", "3 [u'Christian Bale', u'Heath Ledger', u'Aaron E... \n", "5 [u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals... \n", "9 [u'Brad Pitt', u'Edward Norton', u'Helena Bonh... \n", "11 [u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'... \n", "12 [u'Mark Hamill', u'Harrison Ford', u'Carrie Fi... " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this appoarch is inspired by python 'if in [1,2]' functionality\n", "bools = df['genre'].isin(['Drama', 'Action'])\n", "df[bools].head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Takeaways\n", "\n", "1. Use & when putting and filter\n", "2. Use | when putting or filter\n", "3. Remember to put parenthesis as shown in [Vid-9 Experiment 1], it helps pandas to set priority to the evaluations\n", "4. When in situation to use multiple | conditions, try using Vid-9 Experiement 3" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }