{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.20.3\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "print pd.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Removing Columns/Rows (Vid-6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_len</th>\n",
       "      <th>sepal_wid</th>\n",
       "      <th>petal_len</th>\n",
       "      <th>petal_wid</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "      <td>Iris-setosa</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_len  sepal_wid  petal_len  petal_wid        class\n",
       "0        5.1        3.5        1.4        0.2  Iris-setosa\n",
       "1        4.9        3.0        1.4        0.2  Iris-setosa\n",
       "2        4.7        3.2        1.3        0.2  Iris-setosa\n",
       "3        4.6        3.1        1.5        0.2  Iris-setosa\n",
       "4        5.0        3.6        1.4        0.2  Iris-setosa"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DATA_DIR = '../data'\n",
    "# reading table\n",
    "# making seperator as comma\n",
    "# renaming column names for 0th row of the file\n",
    "df = pd.read_table(\n",
    "                   os.path.abspath(os.path.join(DATA_DIR,'day1/iris.csv')), \n",
    "                   sep=',',\n",
    "                   header=0,\n",
    "                   names=['sepal_len', 'sepal_wid', 'petal_len', 'petal_wid', 'class']\n",
    "                  )\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(150, 5)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# see dimension of the dataset\n",
    "# 150 rows, 5 columns\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Column Drop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_len</th>\n",
       "      <th>sepal_wid</th>\n",
       "      <th>petal_len</th>\n",
       "      <th>petal_wid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5.1</td>\n",
       "      <td>3.5</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.9</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_len  sepal_wid  petal_len  petal_wid\n",
       "0        5.1        3.5        1.4        0.2\n",
       "1        4.9        3.0        1.4        0.2\n",
       "2        4.7        3.2        1.3        0.2\n",
       "3        4.6        3.1        1.5        0.2\n",
       "4        5.0        3.6        1.4        0.2"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drop method takes the column names in array\n",
    "# axis=1 corresponds to columns\n",
    "# inplace=True does not require you to hold it in other variable, memory efficient\n",
    "df.drop(['class'], axis=1, inplace=True)\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Row Drop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>sepal_len</th>\n",
       "      <th>sepal_wid</th>\n",
       "      <th>petal_len</th>\n",
       "      <th>petal_wid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4.7</td>\n",
       "      <td>3.2</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.1</td>\n",
       "      <td>1.5</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5.0</td>\n",
       "      <td>3.6</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5.4</td>\n",
       "      <td>3.9</td>\n",
       "      <td>1.7</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>4.6</td>\n",
       "      <td>3.4</td>\n",
       "      <td>1.4</td>\n",
       "      <td>0.3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   sepal_len  sepal_wid  petal_len  petal_wid\n",
       "2        4.7        3.2        1.3        0.2\n",
       "3        4.6        3.1        1.5        0.2\n",
       "4        5.0        3.6        1.4        0.2\n",
       "5        5.4        3.9        1.7        0.4\n",
       "6        4.6        3.4        1.4        0.3"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drop method takes the row names in array\n",
    "# axis=0 corresponds to rows, bydefault axis=0 in drop method\n",
    "# inplace=True does not require you to hold it in other variable, memory efficient\n",
    "df.drop([0, 1], axis=0, inplace=True)\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Takeaways\n",
    "\n",
    "1. Keep in practice to always specify 'axis' parameter in drop method or other necessary methods for better understanding."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# -----------------------"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sorting (Vid-7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9.3</td>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>142</td>\n",
       "      <td>[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.2</td>\n",
       "      <td>The Godfather</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>175</td>\n",
       "      <td>[u'Marlon Brando', u'Al Pacino', u'James Caan']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9.1</td>\n",
       "      <td>The Godfather: Part II</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>200</td>\n",
       "      <td>[u'Al Pacino', u'Robert De Niro', u'Robert Duv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Pulp Fiction</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>154</td>\n",
       "      <td>[u'John Travolta', u'Uma Thurman', u'Samuel L....</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   star_rating                     title content_rating   genre  duration  \\\n",
       "0          9.3  The Shawshank Redemption              R   Crime       142   \n",
       "1          9.2             The Godfather              R   Crime       175   \n",
       "2          9.1    The Godfather: Part II              R   Crime       200   \n",
       "3          9.0           The Dark Knight          PG-13  Action       152   \n",
       "4          8.9              Pulp Fiction              R   Crime       154   \n",
       "\n",
       "                                         actors_list  \n",
       "0  [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...  \n",
       "1    [u'Marlon Brando', u'Al Pacino', u'James Caan']  \n",
       "2  [u'Al Pacino', u'Robert De Niro', u'Robert Duv...  \n",
       "3  [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "4  [u'John Travolta', u'Uma Thurman', u'Samuel L....  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_table(\n",
    "                   'http://bit.ly/imdbratings', \n",
    "                   sep=','\n",
    "                  )\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "978    7.4\n",
      "950    7.4\n",
      "949    7.4\n",
      "948    7.4\n",
      "947    7.4\n",
      "Name: star_rating, dtype: float64\n",
      "6    8.9\n",
      "3    9.0\n",
      "2    9.1\n",
      "1    9.2\n",
      "0    9.3\n",
      "Name: star_rating, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# sort_values() method returns bydefault by ascending order\n",
    "# sort_values() can take 'inplace=True/False' for changing the values inplace\n",
    "print df['star_rating'].sort_values().head(5)\n",
    "print df['star_rating'].sort_values().tail(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    9.3\n",
      "1    9.2\n",
      "2    9.1\n",
      "3    9.0\n",
      "6    8.9\n",
      "Name: star_rating, dtype: float64\n",
      "947    7.4\n",
      "948    7.4\n",
      "949    7.4\n",
      "950    7.4\n",
      "978    7.4\n",
      "Name: star_rating, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# ascending=True/False parameter in sort_values() can decide the sorting order\n",
    "print df['star_rating'].sort_values(ascending=False).head(5)\n",
    "print df['star_rating'].sort_values(ascending=False).tail(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>389</th>\n",
       "      <td>8.0</td>\n",
       "      <td>Freaks</td>\n",
       "      <td>UNRATED</td>\n",
       "      <td>Drama</td>\n",
       "      <td>64</td>\n",
       "      <td>[u'Wallace Ford', u'Leila Hyams', u'Olga Bacla...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>338</th>\n",
       "      <td>8.0</td>\n",
       "      <td>Battleship Potemkin</td>\n",
       "      <td>UNRATED</td>\n",
       "      <td>History</td>\n",
       "      <td>66</td>\n",
       "      <td>[u'Aleksandr Antonov', u'Vladimir Barsky', u'G...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258</th>\n",
       "      <td>8.1</td>\n",
       "      <td>The Cabinet of Dr. Caligari</td>\n",
       "      <td>UNRATED</td>\n",
       "      <td>Crime</td>\n",
       "      <td>67</td>\n",
       "      <td>[u'Werner Krauss', u'Conrad Veidt', u'Friedric...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>293</th>\n",
       "      <td>8.1</td>\n",
       "      <td>Duck Soup</td>\n",
       "      <td>PASSED</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>68</td>\n",
       "      <td>[u'Groucho Marx', u'Harpo Marx', u'Chico Marx']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>8.4</td>\n",
       "      <td>The Kid</td>\n",
       "      <td>NOT RATED</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>68</td>\n",
       "      <td>[u'Charles Chaplin', u'Edna Purviance', u'Jack...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     star_rating                        title content_rating    genre  \\\n",
       "389          8.0                       Freaks        UNRATED    Drama   \n",
       "338          8.0          Battleship Potemkin        UNRATED  History   \n",
       "258          8.1  The Cabinet of Dr. Caligari        UNRATED    Crime   \n",
       "293          8.1                    Duck Soup         PASSED   Comedy   \n",
       "88           8.4                      The Kid      NOT RATED   Comedy   \n",
       "\n",
       "     duration                                        actors_list  \n",
       "389        64  [u'Wallace Ford', u'Leila Hyams', u'Olga Bacla...  \n",
       "338        66  [u'Aleksandr Antonov', u'Vladimir Barsky', u'G...  \n",
       "258        67  [u'Werner Krauss', u'Conrad Veidt', u'Friedric...  \n",
       "293        68    [u'Groucho Marx', u'Harpo Marx', u'Chico Marx']  \n",
       "88         68  [u'Charles Chaplin', u'Edna Purviance', u'Jack...  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# relatively better way to do is to use the below mentioned technique\n",
    "# to sort by multiple fields, just populate the array inside sort_values()\n",
    "df.sort_values(['duration'], ascending=True).head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Takeaways\n",
    "\n",
    "1. Pandas dataframe is table having rows and columns.\n",
    "2. Pandas Series is just one column in the dataframe.\n",
    "3. sort_values() method returns bydefault by ascending order."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# -----------------------\n",
    "\n",
    "# Single Filter (Vid-8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9.3</td>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>142</td>\n",
       "      <td>[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.2</td>\n",
       "      <td>The Godfather</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>175</td>\n",
       "      <td>[u'Marlon Brando', u'Al Pacino', u'James Caan']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9.1</td>\n",
       "      <td>The Godfather: Part II</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>200</td>\n",
       "      <td>[u'Al Pacino', u'Robert De Niro', u'Robert Duv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Pulp Fiction</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>154</td>\n",
       "      <td>[u'John Travolta', u'Uma Thurman', u'Samuel L....</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   star_rating                     title content_rating   genre  duration  \\\n",
       "0          9.3  The Shawshank Redemption              R   Crime       142   \n",
       "1          9.2             The Godfather              R   Crime       175   \n",
       "2          9.1    The Godfather: Part II              R   Crime       200   \n",
       "3          9.0           The Dark Knight          PG-13  Action       152   \n",
       "4          8.9              Pulp Fiction              R   Crime       154   \n",
       "\n",
       "                                         actors_list  \n",
       "0  [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...  \n",
       "1    [u'Marlon Brando', u'Al Pacino', u'James Caan']  \n",
       "2  [u'Al Pacino', u'Robert De Niro', u'Robert Duv...  \n",
       "3  [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "4  [u'John Travolta', u'Uma Thurman', u'Samuel L....  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_table(\n",
    "                   'http://bit.ly/imdbratings', \n",
    "                   sep=','\n",
    "                  )\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experiment 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Modern Times</td>\n",
       "      <td>G</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>87</td>\n",
       "      <td>[u'Charles Chaplin', u'Paulette Goddard', u'He...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Saving Private Ryan</td>\n",
       "      <td>R</td>\n",
       "      <td>Action</td>\n",
       "      <td>169</td>\n",
       "      <td>[u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Raiders of the Lost Ark</td>\n",
       "      <td>PG</td>\n",
       "      <td>Action</td>\n",
       "      <td>115</td>\n",
       "      <td>[u'Harrison Ford', u'Karen Allen', u'Paul Free...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Rear Window</td>\n",
       "      <td>APPROVED</td>\n",
       "      <td>Mystery</td>\n",
       "      <td>112</td>\n",
       "      <td>[u'James Stewart', u'Grace Kelly', u'Wendell C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Psycho</td>\n",
       "      <td>R</td>\n",
       "      <td>Horror</td>\n",
       "      <td>109</td>\n",
       "      <td>[u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                    title content_rating    genre  duration  \\\n",
       "35          8.6             Modern Times              G   Comedy        87   \n",
       "36          8.6      Saving Private Ryan              R   Action       169   \n",
       "37          8.6  Raiders of the Lost Ark             PG   Action       115   \n",
       "38          8.6              Rear Window       APPROVED  Mystery       112   \n",
       "39          8.6                   Psycho              R   Horror       109   \n",
       "\n",
       "                                          actors_list  \n",
       "35  [u'Charles Chaplin', u'Paulette Goddard', u'He...  \n",
       "36     [u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']  \n",
       "37  [u'Harrison Ford', u'Karen Allen', u'Paul Free...  \n",
       "38  [u'James Stewart', u'Grace Kelly', u'Wendell C...  \n",
       "39  [u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we need movies above 8.5\n",
    "df_rating_bools = df['star_rating'].map(lambda row: row>8.5)\n",
    "df[df_rating_bools].tail(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experiment 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Modern Times</td>\n",
       "      <td>G</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>87</td>\n",
       "      <td>[u'Charles Chaplin', u'Paulette Goddard', u'He...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Saving Private Ryan</td>\n",
       "      <td>R</td>\n",
       "      <td>Action</td>\n",
       "      <td>169</td>\n",
       "      <td>[u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Raiders of the Lost Ark</td>\n",
       "      <td>PG</td>\n",
       "      <td>Action</td>\n",
       "      <td>115</td>\n",
       "      <td>[u'Harrison Ford', u'Karen Allen', u'Paul Free...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Rear Window</td>\n",
       "      <td>APPROVED</td>\n",
       "      <td>Mystery</td>\n",
       "      <td>112</td>\n",
       "      <td>[u'James Stewart', u'Grace Kelly', u'Wendell C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Psycho</td>\n",
       "      <td>R</td>\n",
       "      <td>Horror</td>\n",
       "      <td>109</td>\n",
       "      <td>[u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                    title content_rating    genre  duration  \\\n",
       "35          8.6             Modern Times              G   Comedy        87   \n",
       "36          8.6      Saving Private Ryan              R   Action       169   \n",
       "37          8.6  Raiders of the Lost Ark             PG   Action       115   \n",
       "38          8.6              Rear Window       APPROVED  Mystery       112   \n",
       "39          8.6                   Psycho              R   Horror       109   \n",
       "\n",
       "                                          actors_list  \n",
       "35  [u'Charles Chaplin', u'Paulette Goddard', u'He...  \n",
       "36     [u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']  \n",
       "37  [u'Harrison Ford', u'Karen Allen', u'Paul Free...  \n",
       "38  [u'James Stewart', u'Grace Kelly', u'Wendell C...  \n",
       "39  [u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we need movies above 8.5\n",
    "boolean = list()\n",
    "for row in df['star_rating']:\n",
    "    if row > 8.5: boolean.append(True)\n",
    "    else: boolean.append(False)\n",
    "\n",
    "# boolean is a list, and since column in pandas is a series, so we need to convert list to series\n",
    "df_rating_bools = pd.Series(boolean)\n",
    "df[df_rating_bools].tail(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experiment 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Modern Times</td>\n",
       "      <td>G</td>\n",
       "      <td>Comedy</td>\n",
       "      <td>87</td>\n",
       "      <td>[u'Charles Chaplin', u'Paulette Goddard', u'He...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Saving Private Ryan</td>\n",
       "      <td>R</td>\n",
       "      <td>Action</td>\n",
       "      <td>169</td>\n",
       "      <td>[u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Raiders of the Lost Ark</td>\n",
       "      <td>PG</td>\n",
       "      <td>Action</td>\n",
       "      <td>115</td>\n",
       "      <td>[u'Harrison Ford', u'Karen Allen', u'Paul Free...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Rear Window</td>\n",
       "      <td>APPROVED</td>\n",
       "      <td>Mystery</td>\n",
       "      <td>112</td>\n",
       "      <td>[u'James Stewart', u'Grace Kelly', u'Wendell C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>8.6</td>\n",
       "      <td>Psycho</td>\n",
       "      <td>R</td>\n",
       "      <td>Horror</td>\n",
       "      <td>109</td>\n",
       "      <td>[u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                    title content_rating    genre  duration  \\\n",
       "35          8.6             Modern Times              G   Comedy        87   \n",
       "36          8.6      Saving Private Ryan              R   Action       169   \n",
       "37          8.6  Raiders of the Lost Ark             PG   Action       115   \n",
       "38          8.6              Rear Window       APPROVED  Mystery       112   \n",
       "39          8.6                   Psycho              R   Horror       109   \n",
       "\n",
       "                                          actors_list  \n",
       "35  [u'Charles Chaplin', u'Paulette Goddard', u'He...  \n",
       "36     [u'Tom Hanks', u'Matt Damon', u'Tom Sizemore']  \n",
       "37  [u'Harrison Ford', u'Karen Allen', u'Paul Free...  \n",
       "38  [u'James Stewart', u'Grace Kelly', u'Wendell C...  \n",
       "39  [u'Anthony Perkins', u'Janet Leigh', u'Vera Mi...  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df['star_rating'] > 8.5 automatically searches/iterates through all the rows satisying this condition\n",
    "df[df['star_rating'] > 8.5].tail(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Takeaways\n",
    "\n",
    "1. Try practicing, Experiment 3 while coding."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# -----------------------\n",
    "\n",
    "# Multiple Filter (Vid-9)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9.3</td>\n",
       "      <td>The Shawshank Redemption</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>142</td>\n",
       "      <td>[u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9.2</td>\n",
       "      <td>The Godfather</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>175</td>\n",
       "      <td>[u'Marlon Brando', u'Al Pacino', u'James Caan']</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9.1</td>\n",
       "      <td>The Godfather: Part II</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>200</td>\n",
       "      <td>[u'Al Pacino', u'Robert De Niro', u'Robert Duv...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Pulp Fiction</td>\n",
       "      <td>R</td>\n",
       "      <td>Crime</td>\n",
       "      <td>154</td>\n",
       "      <td>[u'John Travolta', u'Uma Thurman', u'Samuel L....</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   star_rating                     title content_rating   genre  duration  \\\n",
       "0          9.3  The Shawshank Redemption              R   Crime       142   \n",
       "1          9.2             The Godfather              R   Crime       175   \n",
       "2          9.1    The Godfather: Part II              R   Crime       200   \n",
       "3          9.0           The Dark Knight          PG-13  Action       152   \n",
       "4          8.9              Pulp Fiction              R   Crime       154   \n",
       "\n",
       "                                         actors_list  \n",
       "0  [u'Tim Robbins', u'Morgan Freeman', u'Bob Gunt...  \n",
       "1    [u'Marlon Brando', u'Al Pacino', u'James Caan']  \n",
       "2  [u'Al Pacino', u'Robert De Niro', u'Robert Duv...  \n",
       "3  [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "4  [u'John Travolta', u'Uma Thurman', u'Samuel L....  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_table(\n",
    "                   'http://bit.ly/imdbratings', \n",
    "                   sep=','\n",
    "                  )\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experiment 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8.9</td>\n",
       "      <td>The Lord of the Rings: The Return of the King</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Adventure</td>\n",
       "      <td>201</td>\n",
       "      <td>[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>8.7</td>\n",
       "      <td>Seven Samurai</td>\n",
       "      <td>UNRATED</td>\n",
       "      <td>Drama</td>\n",
       "      <td>207</td>\n",
       "      <td>[u'Toshir\\xf4 Mifune', u'Takashi Shimura', u'K...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                                          title content_rating  \\\n",
       "7           8.9  The Lord of the Rings: The Return of the King          PG-13   \n",
       "17          8.7                                  Seven Samurai        UNRATED   \n",
       "\n",
       "        genre  duration                                        actors_list  \n",
       "7   Adventure       201  [u'Elijah Wood', u'Viggo Mortensen', u'Ian McK...  \n",
       "17      Drama       207  [u'Toshir\\xf4 Mifune', u'Takashi Shimura', u'K...  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we want movies that have rating above  8.5 and duration above 200mins\n",
    "df[(df['star_rating'] > 8.5) & (df['duration'] > 200)].head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experiment 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8.9</td>\n",
       "      <td>The Lord of the Rings: The Return of the King</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Adventure</td>\n",
       "      <td>201</td>\n",
       "      <td>[u'Elijah Wood', u'Viggo Mortensen', u'Ian McK...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>8.7</td>\n",
       "      <td>Seven Samurai</td>\n",
       "      <td>UNRATED</td>\n",
       "      <td>Drama</td>\n",
       "      <td>207</td>\n",
       "      <td>[u'Toshir\\xf4 Mifune', u'Takashi Shimura', u'K...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                                          title content_rating  \\\n",
       "7           8.9  The Lord of the Rings: The Return of the King          PG-13   \n",
       "17          8.7                                  Seven Samurai        UNRATED   \n",
       "\n",
       "        genre  duration                                        actors_list  \n",
       "7   Adventure       201  [u'Elijah Wood', u'Viggo Mortensen', u'Ian McK...  \n",
       "17      Drama       207  [u'Toshir\\xf4 Mifune', u'Takashi Shimura', u'K...  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_rating_bools = df['star_rating'].map(lambda row: row>8.5)\n",
    "df_duration_bools = df['duration'].map(lambda row: row>200)\n",
    "df[df_rating_bools & df_duration_bools].tail(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experiment 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>star_rating</th>\n",
       "      <th>title</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>genre</th>\n",
       "      <th>duration</th>\n",
       "      <th>actors_list</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9.0</td>\n",
       "      <td>The Dark Knight</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>152</td>\n",
       "      <td>[u'Christian Bale', u'Heath Ledger', u'Aaron E...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8.9</td>\n",
       "      <td>12 Angry Men</td>\n",
       "      <td>NOT RATED</td>\n",
       "      <td>Drama</td>\n",
       "      <td>96</td>\n",
       "      <td>[u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>8.9</td>\n",
       "      <td>Fight Club</td>\n",
       "      <td>R</td>\n",
       "      <td>Drama</td>\n",
       "      <td>139</td>\n",
       "      <td>[u'Brad Pitt', u'Edward Norton', u'Helena Bonh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>8.8</td>\n",
       "      <td>Inception</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>Action</td>\n",
       "      <td>148</td>\n",
       "      <td>[u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>8.8</td>\n",
       "      <td>Star Wars: Episode V - The Empire Strikes Back</td>\n",
       "      <td>PG</td>\n",
       "      <td>Action</td>\n",
       "      <td>124</td>\n",
       "      <td>[u'Mark Hamill', u'Harrison Ford', u'Carrie Fi...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    star_rating                                           title  \\\n",
       "3           9.0                                 The Dark Knight   \n",
       "5           8.9                                    12 Angry Men   \n",
       "9           8.9                                      Fight Club   \n",
       "11          8.8                                       Inception   \n",
       "12          8.8  Star Wars: Episode V - The Empire Strikes Back   \n",
       "\n",
       "   content_rating   genre  duration  \\\n",
       "3           PG-13  Action       152   \n",
       "5       NOT RATED   Drama        96   \n",
       "9               R   Drama       139   \n",
       "11          PG-13  Action       148   \n",
       "12             PG  Action       124   \n",
       "\n",
       "                                          actors_list  \n",
       "3   [u'Christian Bale', u'Heath Ledger', u'Aaron E...  \n",
       "5   [u'Henry Fonda', u'Lee J. Cobb', u'Martin Bals...  \n",
       "9   [u'Brad Pitt', u'Edward Norton', u'Helena Bonh...  \n",
       "11  [u'Leonardo DiCaprio', u'Joseph Gordon-Levitt'...  \n",
       "12  [u'Mark Hamill', u'Harrison Ford', u'Carrie Fi...  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this appoarch is inspired by python 'if in [1,2]' functionality\n",
    "bools = df['genre'].isin(['Drama', 'Action'])\n",
    "df[bools].head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Takeaways\n",
    "\n",
    "1. Use & when putting and filter\n",
    "2. Use | when putting or filter\n",
    "3. Remember to put parenthesis as shown in [Vid-9 Experiment 1], it helps pandas to set priority to the evaluations\n",
    "4. When in situation to use multiple | conditions, try using Vid-9 Experiement 3"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}