{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.20.3\n",
      "2.2.2\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import matplotlib\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "print pd.__version__\n",
    "print matplotlib.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Missing Values in Pandas (Vid-16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Colors Reported</th>\n",
       "      <th>Shape Reported</th>\n",
       "      <th>State</th>\n",
       "      <th>Time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ithaca</td>\n",
       "      <td>NaN</td>\n",
       "      <td>TRIANGLE</td>\n",
       "      <td>NY</td>\n",
       "      <td>6/1/1930 22:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Willingboro</td>\n",
       "      <td>NaN</td>\n",
       "      <td>OTHER</td>\n",
       "      <td>NJ</td>\n",
       "      <td>6/30/1930 20:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Holyoke</td>\n",
       "      <td>NaN</td>\n",
       "      <td>OVAL</td>\n",
       "      <td>CO</td>\n",
       "      <td>2/15/1931 14:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Abilene</td>\n",
       "      <td>NaN</td>\n",
       "      <td>DISK</td>\n",
       "      <td>KS</td>\n",
       "      <td>6/1/1931 13:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>New York Worlds Fair</td>\n",
       "      <td>NaN</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>NY</td>\n",
       "      <td>4/18/1933 19:00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   City Colors Reported Shape Reported State             Time\n",
       "0                Ithaca             NaN       TRIANGLE    NY   6/1/1930 22:00\n",
       "1           Willingboro             NaN          OTHER    NJ  6/30/1930 20:00\n",
       "2               Holyoke             NaN           OVAL    CO  2/15/1931 14:00\n",
       "3               Abilene             NaN           DISK    KS   6/1/1931 13:00\n",
       "4  New York Worlds Fair             NaN          LIGHT    NY  4/18/1933 19:00"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# reading table\n",
    "# making seperator as comma\n",
    "df = pd.read_table(\n",
    "                   'http://bit.ly/uforeports', \n",
    "                   sep=','\n",
    "                  )\n",
    "df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Colors Reported</th>\n",
       "      <th>Shape Reported</th>\n",
       "      <th>State</th>\n",
       "      <th>Time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    City  Colors Reported  Shape Reported  State   Time\n",
       "0  False             True           False  False  False\n",
       "1  False             True           False  False  False\n",
       "2  False             True           False  False  False\n",
       "3  False             True           False  False  False\n",
       "4  False             True           False  False  False"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# isnull() method returns a df of T/F considering if NaN occurs in a particular cell\n",
    "df.isnull().head()\n",
    "# notnull() is just inverse of isnull()\n",
    "# df.notnull().head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "City                  25\n",
       "Colors Reported    15359\n",
       "Shape Reported      2644\n",
       "State                  0\n",
       "Time                   0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# pandas converts T to 1 and F to 0\n",
    "# since isnull() returns T to whereever NaN exists\n",
    "# .sum() over it gives the total number of NaN in the dataframe\n",
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Colors Reported</th>\n",
       "      <th>Shape Reported</th>\n",
       "      <th>State</th>\n",
       "      <th>Time</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Belton</td>\n",
       "      <td>RED</td>\n",
       "      <td>SPHERE</td>\n",
       "      <td>SC</td>\n",
       "      <td>6/30/1939 20:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Bering Sea</td>\n",
       "      <td>RED</td>\n",
       "      <td>OTHER</td>\n",
       "      <td>AK</td>\n",
       "      <td>4/30/1943 23:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Portsmouth</td>\n",
       "      <td>RED</td>\n",
       "      <td>FORMATION</td>\n",
       "      <td>VA</td>\n",
       "      <td>7/10/1945 1:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>Blairsden</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>SPHERE</td>\n",
       "      <td>CA</td>\n",
       "      <td>6/30/1946 19:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>82</th>\n",
       "      <td>San Jose</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>CHEVRON</td>\n",
       "      <td>CA</td>\n",
       "      <td>7/15/1947 21:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>84</th>\n",
       "      <td>Modesto</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>DISK</td>\n",
       "      <td>CA</td>\n",
       "      <td>8/8/1947 22:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>Scipio</td>\n",
       "      <td>RED</td>\n",
       "      <td>SPHERE</td>\n",
       "      <td>IN</td>\n",
       "      <td>5/10/1948 19:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>111</th>\n",
       "      <td>Tarrant City</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>AL</td>\n",
       "      <td>8/15/1949 22:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>Napa</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>DISK</td>\n",
       "      <td>CA</td>\n",
       "      <td>6/10/1950 0:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>Coeur d'Alene</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>CIGAR</td>\n",
       "      <td>ID</td>\n",
       "      <td>7/2/1950 13:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>152</th>\n",
       "      <td>Irving</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>DISK</td>\n",
       "      <td>KS</td>\n",
       "      <td>4/15/1951 0:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>157</th>\n",
       "      <td>Greenville</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>DISK</td>\n",
       "      <td>MS</td>\n",
       "      <td>6/15/1951 20:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>163</th>\n",
       "      <td>Green River</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>SPHERE</td>\n",
       "      <td>WY</td>\n",
       "      <td>7/3/1951 12:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>164</th>\n",
       "      <td>Provo</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>DISK</td>\n",
       "      <td>UT</td>\n",
       "      <td>7/10/1951 23:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174</th>\n",
       "      <td>Greenville</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>TRIANGLE</td>\n",
       "      <td>TX</td>\n",
       "      <td>4/15/1952 16:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>Norfolk</td>\n",
       "      <td>RED</td>\n",
       "      <td>FIREBALL</td>\n",
       "      <td>VA</td>\n",
       "      <td>6/1/1952 22:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>202</th>\n",
       "      <td>Arlington</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>OVAL</td>\n",
       "      <td>VA</td>\n",
       "      <td>7/13/1952 21:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226</th>\n",
       "      <td>Cambridge</td>\n",
       "      <td>RED</td>\n",
       "      <td>SPHERE</td>\n",
       "      <td>MA</td>\n",
       "      <td>4/1/1953 18:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229</th>\n",
       "      <td>Midwest City</td>\n",
       "      <td>YELLOW</td>\n",
       "      <td>FIREBALL</td>\n",
       "      <td>OK</td>\n",
       "      <td>4/15/1953 16:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>238</th>\n",
       "      <td>Cleveland</td>\n",
       "      <td>RED</td>\n",
       "      <td>FIREBALL</td>\n",
       "      <td>OH</td>\n",
       "      <td>6/30/1953 0:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>249</th>\n",
       "      <td>Artesia</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>OTHER</td>\n",
       "      <td>NM</td>\n",
       "      <td>8/15/1953 19:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>256</th>\n",
       "      <td>Pendleton</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>DISK</td>\n",
       "      <td>IN</td>\n",
       "      <td>11/21/1953 22:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>288</th>\n",
       "      <td>St. Louis Airport</td>\n",
       "      <td>RED</td>\n",
       "      <td>OVAL</td>\n",
       "      <td>MO</td>\n",
       "      <td>7/1/1954 21:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>289</th>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>RED</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>CA</td>\n",
       "      <td>7/1/1954 22:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>304</th>\n",
       "      <td>Beaumont</td>\n",
       "      <td>RED</td>\n",
       "      <td>DISK</td>\n",
       "      <td>TX</td>\n",
       "      <td>9/9/1954 12:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>311</th>\n",
       "      <td>Red Bank</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>NJ</td>\n",
       "      <td>12/15/1954 23:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>314</th>\n",
       "      <td>Holbrook</td>\n",
       "      <td>YELLOW</td>\n",
       "      <td>EGG</td>\n",
       "      <td>MA</td>\n",
       "      <td>5/1/1955 15:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>323</th>\n",
       "      <td>Terre Haute</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>IN</td>\n",
       "      <td>6/15/1955 0:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>354</th>\n",
       "      <td>Memphis</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>CYLINDER</td>\n",
       "      <td>TN</td>\n",
       "      <td>6/1/1956 20:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>363</th>\n",
       "      <td>Vista</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>CIGAR</td>\n",
       "      <td>CA</td>\n",
       "      <td>6/15/1956 19:15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18110</th>\n",
       "      <td>Gilbert</td>\n",
       "      <td>RED</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>AZ</td>\n",
       "      <td>12/11/2000 23:45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18115</th>\n",
       "      <td>Madison</td>\n",
       "      <td>RED GREEN</td>\n",
       "      <td>TEARDROP</td>\n",
       "      <td>OH</td>\n",
       "      <td>12/12/2000 18:45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18117</th>\n",
       "      <td>Concord</td>\n",
       "      <td>RED GREEN</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>NH</td>\n",
       "      <td>12/12/2000 23:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18121</th>\n",
       "      <td>Redwood Valley</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>CA</td>\n",
       "      <td>12/15/2000 1:09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18123</th>\n",
       "      <td>Waldorf</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>MD</td>\n",
       "      <td>12/15/2000 5:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18127</th>\n",
       "      <td>North Pole</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>SPHERE</td>\n",
       "      <td>AK</td>\n",
       "      <td>12/15/2000 16:36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18129</th>\n",
       "      <td>Beaver</td>\n",
       "      <td>RED</td>\n",
       "      <td>DISK</td>\n",
       "      <td>PA</td>\n",
       "      <td>12/15/2000 18:25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18134</th>\n",
       "      <td>Graham</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>WA</td>\n",
       "      <td>12/16/2000 0:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18141</th>\n",
       "      <td>Sebastopol</td>\n",
       "      <td>YELLOW</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>CA</td>\n",
       "      <td>12/17/2000 18:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18144</th>\n",
       "      <td>Medina</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>TRIANGLE</td>\n",
       "      <td>OH</td>\n",
       "      <td>12/17/2000 19:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18148</th>\n",
       "      <td>Highland Park</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>VARIOUS</td>\n",
       "      <td>NJ</td>\n",
       "      <td>12/18/2000 2:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18158</th>\n",
       "      <td>Woodland</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>CA</td>\n",
       "      <td>12/19/2000 23:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18167</th>\n",
       "      <td>Gilbert</td>\n",
       "      <td>RED</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>AZ</td>\n",
       "      <td>12/21/2000 19:05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18170</th>\n",
       "      <td>MM 110</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>AZ</td>\n",
       "      <td>12/22/2000 3:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18172</th>\n",
       "      <td>Toomsuba</td>\n",
       "      <td>RED</td>\n",
       "      <td>OVAL</td>\n",
       "      <td>MS</td>\n",
       "      <td>12/23/2000 4:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18177</th>\n",
       "      <td>Chandler</td>\n",
       "      <td>RED</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>AZ</td>\n",
       "      <td>12/23/2000 22:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18181</th>\n",
       "      <td>Fortuna</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>CIRCLE</td>\n",
       "      <td>CA</td>\n",
       "      <td>12/24/2000 18:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18184</th>\n",
       "      <td>Plymouth</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>FIREBALL</td>\n",
       "      <td>OH</td>\n",
       "      <td>12/24/2000 22:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18191</th>\n",
       "      <td>Fallston</td>\n",
       "      <td>RED</td>\n",
       "      <td>VARIOUS</td>\n",
       "      <td>MD</td>\n",
       "      <td>12/25/2000 19:15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18192</th>\n",
       "      <td>Atlanta</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>GA</td>\n",
       "      <td>12/25/2000 20:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18194</th>\n",
       "      <td>Walpole</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>FIREBALL</td>\n",
       "      <td>NH</td>\n",
       "      <td>12/26/2000 18:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18195</th>\n",
       "      <td>Walpole</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>FIREBALL</td>\n",
       "      <td>NH</td>\n",
       "      <td>12/26/2000 18:20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18196</th>\n",
       "      <td>Brocton</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>OVAL</td>\n",
       "      <td>MA</td>\n",
       "      <td>12/26/2000 18:23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18197</th>\n",
       "      <td>Greenfiled</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>FIREBALL</td>\n",
       "      <td>MA</td>\n",
       "      <td>12/26/2000 18:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18210</th>\n",
       "      <td>Monument Valley</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>OTHER</td>\n",
       "      <td>UT</td>\n",
       "      <td>12/28/2000 17:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18213</th>\n",
       "      <td>Pasadena</td>\n",
       "      <td>GREEN</td>\n",
       "      <td>FIREBALL</td>\n",
       "      <td>CA</td>\n",
       "      <td>12/28/2000 19:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18216</th>\n",
       "      <td>Garden Grove</td>\n",
       "      <td>ORANGE</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>CA</td>\n",
       "      <td>12/29/2000 16:10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18220</th>\n",
       "      <td>Shasta Lake</td>\n",
       "      <td>BLUE</td>\n",
       "      <td>DISK</td>\n",
       "      <td>CA</td>\n",
       "      <td>12/29/2000 20:30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18233</th>\n",
       "      <td>Anchorage</td>\n",
       "      <td>RED</td>\n",
       "      <td>VARIOUS</td>\n",
       "      <td>AK</td>\n",
       "      <td>12/31/2000 21:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18239</th>\n",
       "      <td>Eagle River</td>\n",
       "      <td>RED</td>\n",
       "      <td>LIGHT</td>\n",
       "      <td>WI</td>\n",
       "      <td>12/31/2000 23:45</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2486 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                    City Colors Reported Shape Reported State  \\\n",
       "12                Belton             RED         SPHERE    SC   \n",
       "19            Bering Sea             RED          OTHER    AK   \n",
       "36            Portsmouth             RED      FORMATION    VA   \n",
       "44             Blairsden           GREEN         SPHERE    CA   \n",
       "82              San Jose            BLUE        CHEVRON    CA   \n",
       "84               Modesto            BLUE           DISK    CA   \n",
       "91                Scipio             RED         SPHERE    IN   \n",
       "111         Tarrant City          ORANGE         CIRCLE    AL   \n",
       "129                 Napa           GREEN           DISK    CA   \n",
       "138        Coeur d'Alene          ORANGE          CIGAR    ID   \n",
       "152               Irving            BLUE           DISK    KS   \n",
       "157           Greenville           GREEN           DISK    MS   \n",
       "163          Green River           GREEN         SPHERE    WY   \n",
       "164                Provo            BLUE           DISK    UT   \n",
       "174           Greenville          ORANGE       TRIANGLE    TX   \n",
       "178              Norfolk             RED       FIREBALL    VA   \n",
       "202            Arlington           GREEN           OVAL    VA   \n",
       "226            Cambridge             RED         SPHERE    MA   \n",
       "229         Midwest City          YELLOW       FIREBALL    OK   \n",
       "238            Cleveland             RED       FIREBALL    OH   \n",
       "249              Artesia          ORANGE          OTHER    NM   \n",
       "256            Pendleton           GREEN           DISK    IN   \n",
       "288    St. Louis Airport             RED           OVAL    MO   \n",
       "289          Los Angeles             RED         CIRCLE    CA   \n",
       "304             Beaumont             RED           DISK    TX   \n",
       "311             Red Bank          ORANGE         CIRCLE    NJ   \n",
       "314             Holbrook          YELLOW            EGG    MA   \n",
       "323          Terre Haute          ORANGE         CIRCLE    IN   \n",
       "354              Memphis          ORANGE       CYLINDER    TN   \n",
       "363                Vista          ORANGE          CIGAR    CA   \n",
       "...                  ...             ...            ...   ...   \n",
       "18110            Gilbert             RED          LIGHT    AZ   \n",
       "18115            Madison       RED GREEN       TEARDROP    OH   \n",
       "18117            Concord       RED GREEN         CIRCLE    NH   \n",
       "18121     Redwood Valley            BLUE          LIGHT    CA   \n",
       "18123            Waldorf           GREEN          LIGHT    MD   \n",
       "18127         North Pole          ORANGE         SPHERE    AK   \n",
       "18129             Beaver             RED           DISK    PA   \n",
       "18134             Graham           GREEN          LIGHT    WA   \n",
       "18141         Sebastopol          YELLOW         CIRCLE    CA   \n",
       "18144             Medina          ORANGE       TRIANGLE    OH   \n",
       "18148      Highland Park            BLUE        VARIOUS    NJ   \n",
       "18158           Woodland          ORANGE          LIGHT    CA   \n",
       "18167            Gilbert             RED         CIRCLE    AZ   \n",
       "18170             MM 110            BLUE         CIRCLE    AZ   \n",
       "18172           Toomsuba             RED           OVAL    MS   \n",
       "18177           Chandler             RED          LIGHT    AZ   \n",
       "18181            Fortuna          ORANGE         CIRCLE    CA   \n",
       "18184           Plymouth           GREEN       FIREBALL    OH   \n",
       "18191           Fallston             RED        VARIOUS    MD   \n",
       "18192            Atlanta          ORANGE          LIGHT    GA   \n",
       "18194            Walpole           GREEN       FIREBALL    NH   \n",
       "18195            Walpole           GREEN       FIREBALL    NH   \n",
       "18196            Brocton           GREEN           OVAL    MA   \n",
       "18197         Greenfiled           GREEN       FIREBALL    MA   \n",
       "18210    Monument Valley            BLUE          OTHER    UT   \n",
       "18213           Pasadena           GREEN       FIREBALL    CA   \n",
       "18216       Garden Grove          ORANGE          LIGHT    CA   \n",
       "18220        Shasta Lake            BLUE           DISK    CA   \n",
       "18233          Anchorage             RED        VARIOUS    AK   \n",
       "18239        Eagle River             RED          LIGHT    WI   \n",
       "\n",
       "                   Time  \n",
       "12      6/30/1939 20:00  \n",
       "19      4/30/1943 23:00  \n",
       "36       7/10/1945 1:30  \n",
       "44      6/30/1946 19:00  \n",
       "82      7/15/1947 21:00  \n",
       "84       8/8/1947 22:00  \n",
       "91      5/10/1948 19:00  \n",
       "111     8/15/1949 22:00  \n",
       "129      6/10/1950 0:00  \n",
       "138      7/2/1950 13:00  \n",
       "152      4/15/1951 0:30  \n",
       "157     6/15/1951 20:30  \n",
       "163      7/3/1951 12:00  \n",
       "164     7/10/1951 23:30  \n",
       "174     4/15/1952 16:00  \n",
       "178      6/1/1952 22:00  \n",
       "202     7/13/1952 21:00  \n",
       "226      4/1/1953 18:00  \n",
       "229     4/15/1953 16:00  \n",
       "238      6/30/1953 0:00  \n",
       "249     8/15/1953 19:00  \n",
       "256    11/21/1953 22:30  \n",
       "288      7/1/1954 21:00  \n",
       "289      7/1/1954 22:00  \n",
       "304      9/9/1954 12:30  \n",
       "311    12/15/1954 23:10  \n",
       "314      5/1/1955 15:00  \n",
       "323      6/15/1955 0:00  \n",
       "354      6/1/1956 20:00  \n",
       "363     6/15/1956 19:15  \n",
       "...                 ...  \n",
       "18110  12/11/2000 23:45  \n",
       "18115  12/12/2000 18:45  \n",
       "18117  12/12/2000 23:10  \n",
       "18121   12/15/2000 1:09  \n",
       "18123   12/15/2000 5:30  \n",
       "18127  12/15/2000 16:36  \n",
       "18129  12/15/2000 18:25  \n",
       "18134   12/16/2000 0:20  \n",
       "18141  12/17/2000 18:00  \n",
       "18144  12/17/2000 19:30  \n",
       "18148   12/18/2000 2:30  \n",
       "18158  12/19/2000 23:30  \n",
       "18167  12/21/2000 19:05  \n",
       "18170   12/22/2000 3:00  \n",
       "18172   12/23/2000 4:00  \n",
       "18177  12/23/2000 22:00  \n",
       "18181  12/24/2000 18:00  \n",
       "18184  12/24/2000 22:00  \n",
       "18191  12/25/2000 19:15  \n",
       "18192  12/25/2000 20:30  \n",
       "18194  12/26/2000 18:20  \n",
       "18195  12/26/2000 18:20  \n",
       "18196  12/26/2000 18:23  \n",
       "18197  12/26/2000 18:30  \n",
       "18210  12/28/2000 17:51  \n",
       "18213  12/28/2000 19:10  \n",
       "18216  12/29/2000 16:10  \n",
       "18220  12/29/2000 20:30  \n",
       "18233  12/31/2000 21:00  \n",
       "18239  12/31/2000 23:45  \n",
       "\n",
       "[2486 rows x 5 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Handling Missing Values\n",
    "## Drop Missing Values\n",
    "\n",
    "# dropna() method drops al those rows from the dataset where anyof the columns holds a NaN values\n",
    "df.dropna(how='any')\n",
    "\n",
    "# (how='all') - drop only if all the values in the row are missing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2877, 5)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# drop a row only if any of the City, Colors Reported columns have NaN value\n",
    "df.dropna(subset=['City', 'Colors Reported'], how='any').shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Seattle          187\n",
      "New York City    161\n",
      "Name: City, dtype: int64\n",
      "\n",
      "25\n",
      "\n",
      "Seattle          212\n",
      "New York City    161\n",
      "Name: City, dtype: int64\n",
      "\n",
      "\n",
      "187+25 = 212\n"
     ]
    }
   ],
   "source": [
    "print df.City.value_counts().head(2)\n",
    "print \n",
    "print df.City.isnull().sum()\n",
    "print \n",
    "print df.City.fillna(value='Seattle', inplace=False).value_counts().head(2)\n",
    "\n",
    "print \"\\n\\n187+25 = 212\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Takeaways\n",
    "\n",
    "1. For any value that is missing in the dataset. Pandas makes it NaN bydefault\n",
    "2. Missing values are excluded while making a count using value_counts()\n",
    "\n",
    "# -----------------------\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}