{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%matplotlib inline\n",
    "\n",
    "from datetime import datetime, date"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Loading the Customer Address Data from the excel file\n",
    "\n",
    "cust_address = pd.read_excel('Raw_data.xlsx' , sheet_name='CustomerAddress')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>address</th>\n",
       "      <th>postcode</th>\n",
       "      <th>state</th>\n",
       "      <th>country</th>\n",
       "      <th>property_valuation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>060 Morning Avenue</td>\n",
       "      <td>2016</td>\n",
       "      <td>New South Wales</td>\n",
       "      <td>Australia</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>6 Meadow Vale Court</td>\n",
       "      <td>2153</td>\n",
       "      <td>New South Wales</td>\n",
       "      <td>Australia</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>0 Holy Cross Court</td>\n",
       "      <td>4211</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>17979 Del Mar Point</td>\n",
       "      <td>2448</td>\n",
       "      <td>New South Wales</td>\n",
       "      <td>Australia</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>9 Oakridge Court</td>\n",
       "      <td>3216</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id              address  postcode            state    country  \\\n",
       "0            1   060 Morning Avenue      2016  New South Wales  Australia   \n",
       "1            2  6 Meadow Vale Court      2153  New South Wales  Australia   \n",
       "2            4   0 Holy Cross Court      4211              QLD  Australia   \n",
       "3            5  17979 Del Mar Point      2448  New South Wales  Australia   \n",
       "4            6     9 Oakridge Court      3216              VIC  Australia   \n",
       "\n",
       "   property_valuation  \n",
       "0                  10  \n",
       "1                  10  \n",
       "2                   9  \n",
       "3                   4  \n",
       "4                   9  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Checking first 5 records from Customer Address Data\n",
    "\n",
    "cust_address.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 3999 entries, 0 to 3998\n",
      "Data columns (total 6 columns):\n",
      "customer_id           3999 non-null int64\n",
      "address               3999 non-null object\n",
      "postcode              3999 non-null int64\n",
      "state                 3999 non-null object\n",
      "country               3999 non-null object\n",
      "property_valuation    3999 non-null int64\n",
      "dtypes: int64(3), object(3)\n",
      "memory usage: 187.5+ KB\n"
     ]
    }
   ],
   "source": [
    "# Information of columns and data-types of Customer Address Data.\n",
    "\n",
    "cust_address.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The data-type of columns looks fine. Let;s check for the data quality and apply data cleaning process where ever applicable to clean our dataset before performing any analysis."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Total Records"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total records (rows) in the dataset : 3999\n",
      "Total columns (features) in the dataset : 6\n"
     ]
    }
   ],
   "source": [
    "print(\"Total records (rows) in the dataset : {}\".format(cust_address.shape[0]))\n",
    "print(\"Total columns (features) in the dataset : {}\".format(cust_address.shape[1]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Numeric Columns and Non-Numeric Columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The numeric columns are : ['customer_id' 'postcode' 'property_valuation']\n",
      "The non-numeric columns are  : ['address' 'state' 'country']\n"
     ]
    }
   ],
   "source": [
    "# select numeric columns\n",
    "df_numeric = cust_address.select_dtypes(include=[np.number])\n",
    "numeric_cols = df_numeric.columns.values\n",
    "print(\"The numeric columns are : {}\".format(numeric_cols))\n",
    "\n",
    "\n",
    "# select non-numeric columns\n",
    "df_non_numeric = cust_address.select_dtypes(exclude=[np.number])\n",
    "non_numeric_cols = df_non_numeric.columns.values\n",
    "print(\"The non-numeric columns are  : {}\".format(non_numeric_cols))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Missing Values Check"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Checking for the presence of any missing values in the dataset. If missing values are present for a particular feature then depending upon the situation the feature may be either dropped (cases when a major amount of data is missing) or an appropiate value will be imputed in the feature column with missing values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "customer_id           0\n",
       "address               0\n",
       "postcode              0\n",
       "state                 0\n",
       "country               0\n",
       "property_valuation    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Total number of missing values\n",
    "cust_address.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>In the dataset there are no missing values.</b>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Inconsistency Check in Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will check whether there is inconsistent data / typo error data is present in the categorical columns.<br>\n",
    "The columns to be checked are <b>'address', 'postcode' ,'state', 'country'</b>."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1 State"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NSW                2054\n",
       "VIC                 939\n",
       "QLD                 838\n",
       "New South Wales      86\n",
       "Victoria             82\n",
       "Name: state, dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cust_address['state'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here there are inconsistent data in State column. For New South Wales and Victoria we have two values, one being the full name and the other being their short name. The State names should be standardised and  columns with state as <b>New South Wales will be replaced by NSW</b> and columns with state as <b>Victoria will be replaced by VIC</b>."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to replace full state names with their short forms.\n",
    "\n",
    "def replace_state_names(state_name):\n",
    "    \n",
    "    # Making Short Form of State Names as standards\n",
    "    if state_name=='New South Wales':\n",
    "        return 'NSW'\n",
    "    elif state_name=='Victoria':\n",
    "        return 'VIC'\n",
    "    else :\n",
    "        return state_name\n",
    "\n",
    "# Applying the above fuction to state column\n",
    "cust_address['state'] = cust_address['state'].apply(replace_state_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After applying the above function the state name is standardised and there is no inconsistency in the state column."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NSW    2140\n",
       "VIC    1021\n",
       "QLD     838\n",
       "Name: state, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cust_address['state'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2 Country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Australia    3999\n",
       "Name: country, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cust_address['country'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There is no inconsistency of data in the Country column."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3 Postcode"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The Postcode column looks perfect. There is no inconsistency / typo in the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>address</th>\n",
       "      <th>postcode</th>\n",
       "      <th>state</th>\n",
       "      <th>country</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>060 Morning Avenue</td>\n",
       "      <td>2016</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6 Meadow Vale Court</td>\n",
       "      <td>2153</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0 Holy Cross Court</td>\n",
       "      <td>4211</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>17979 Del Mar Point</td>\n",
       "      <td>2448</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9 Oakridge Court</td>\n",
       "      <td>3216</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>4 Delaware Trail</td>\n",
       "      <td>2210</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>49 Londonderry Lane</td>\n",
       "      <td>2650</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>97736 7th Trail</td>\n",
       "      <td>2023</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>93405 Ludington Park</td>\n",
       "      <td>3044</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>44339 Golden Leaf Alley</td>\n",
       "      <td>4557</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2 Sutherland Street</td>\n",
       "      <td>3799</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>9 Mcbride Trail</td>\n",
       "      <td>2760</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>9861 New Castle Avenue</td>\n",
       "      <td>2428</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>52 Moland Street</td>\n",
       "      <td>3331</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>82391 Kensington Lane</td>\n",
       "      <td>3058</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>092 2nd Alley</td>\n",
       "      <td>2135</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>59 Spaight Circle</td>\n",
       "      <td>2233</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>032 Bartelt Crossing</td>\n",
       "      <td>2444</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18 Jenna Center</td>\n",
       "      <td>2650</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>3 Cordelia Plaza</td>\n",
       "      <td>2153</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>28 5th Center</td>\n",
       "      <td>4413</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>52 Carey Alley</td>\n",
       "      <td>4740</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>96 Texas Plaza</td>\n",
       "      <td>3218</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>48 Eagan Avenue</td>\n",
       "      <td>4868</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>9 Buell Park</td>\n",
       "      <td>4116</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>02663 Buell Parkway</td>\n",
       "      <td>2519</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>2294 Pleasure Place</td>\n",
       "      <td>2135</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>2951 Petterle Place</td>\n",
       "      <td>2756</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>63 Lukken Drive</td>\n",
       "      <td>2170</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>833 Luster Way</td>\n",
       "      <td>4005</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3969</th>\n",
       "      <td>81609 Vernon Terrace</td>\n",
       "      <td>3934</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3970</th>\n",
       "      <td>37 Hintze Court</td>\n",
       "      <td>2168</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3971</th>\n",
       "      <td>07 Morning Court</td>\n",
       "      <td>3805</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3972</th>\n",
       "      <td>327 Loeprich Street</td>\n",
       "      <td>3187</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3973</th>\n",
       "      <td>5106 Northridge Drive</td>\n",
       "      <td>2560</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3974</th>\n",
       "      <td>9754 High Crossing Terrace</td>\n",
       "      <td>2148</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3975</th>\n",
       "      <td>7 Meadow Vale Court</td>\n",
       "      <td>2114</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3976</th>\n",
       "      <td>80260 Morning Road</td>\n",
       "      <td>3178</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3977</th>\n",
       "      <td>38017 Briar Crest Drive</td>\n",
       "      <td>2165</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3978</th>\n",
       "      <td>60 Morningstar Center</td>\n",
       "      <td>2126</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3979</th>\n",
       "      <td>218 Stuart Junction</td>\n",
       "      <td>2223</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3980</th>\n",
       "      <td>9 Butterfield Lane</td>\n",
       "      <td>4077</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3981</th>\n",
       "      <td>614 Burning Wood Way</td>\n",
       "      <td>2148</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3982</th>\n",
       "      <td>9 Grover Point</td>\n",
       "      <td>4218</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3983</th>\n",
       "      <td>565 Bunting Park</td>\n",
       "      <td>2076</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3984</th>\n",
       "      <td>9461 Saint Paul Trail</td>\n",
       "      <td>2428</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3985</th>\n",
       "      <td>5204 Delaware Pass</td>\n",
       "      <td>2560</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3986</th>\n",
       "      <td>24 Scott Pass</td>\n",
       "      <td>4300</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3987</th>\n",
       "      <td>8 Randy Parkway</td>\n",
       "      <td>2209</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3988</th>\n",
       "      <td>681 Elmside Place</td>\n",
       "      <td>3750</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3989</th>\n",
       "      <td>2918 Summer Ridge Hill</td>\n",
       "      <td>3030</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3990</th>\n",
       "      <td>613 Erie Lane</td>\n",
       "      <td>2088</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3991</th>\n",
       "      <td>0 Transport Center</td>\n",
       "      <td>3977</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3992</th>\n",
       "      <td>4 Dovetail Crossing</td>\n",
       "      <td>2350</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3993</th>\n",
       "      <td>736 Roxbury Junction</td>\n",
       "      <td>2540</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3994</th>\n",
       "      <td>1482 Hauk Trail</td>\n",
       "      <td>3064</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3995</th>\n",
       "      <td>57042 Village Green Point</td>\n",
       "      <td>4511</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3996</th>\n",
       "      <td>87 Crescent Oaks Alley</td>\n",
       "      <td>2756</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3997</th>\n",
       "      <td>8194 Lien Street</td>\n",
       "      <td>4032</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3998</th>\n",
       "      <td>320 Acker Drive</td>\n",
       "      <td>2251</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3999 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                         address  postcode state    country\n",
       "0             060 Morning Avenue      2016   NSW  Australia\n",
       "1            6 Meadow Vale Court      2153   NSW  Australia\n",
       "2             0 Holy Cross Court      4211   QLD  Australia\n",
       "3            17979 Del Mar Point      2448   NSW  Australia\n",
       "4               9 Oakridge Court      3216   VIC  Australia\n",
       "5               4 Delaware Trail      2210   NSW  Australia\n",
       "6            49 Londonderry Lane      2650   NSW  Australia\n",
       "7                97736 7th Trail      2023   NSW  Australia\n",
       "8           93405 Ludington Park      3044   VIC  Australia\n",
       "9        44339 Golden Leaf Alley      4557   QLD  Australia\n",
       "10           2 Sutherland Street      3799   VIC  Australia\n",
       "11               9 Mcbride Trail      2760   NSW  Australia\n",
       "12        9861 New Castle Avenue      2428   NSW  Australia\n",
       "13              52 Moland Street      3331   VIC  Australia\n",
       "14         82391 Kensington Lane      3058   VIC  Australia\n",
       "15                 092 2nd Alley      2135   NSW  Australia\n",
       "16             59 Spaight Circle      2233   NSW  Australia\n",
       "17          032 Bartelt Crossing      2444   NSW  Australia\n",
       "18               18 Jenna Center      2650   NSW  Australia\n",
       "19              3 Cordelia Plaza      2153   NSW  Australia\n",
       "20                 28 5th Center      4413   QLD  Australia\n",
       "21                52 Carey Alley      4740   QLD  Australia\n",
       "22                96 Texas Plaza      3218   VIC  Australia\n",
       "23               48 Eagan Avenue      4868   QLD  Australia\n",
       "24                  9 Buell Park      4116   QLD  Australia\n",
       "25           02663 Buell Parkway      2519   NSW  Australia\n",
       "26           2294 Pleasure Place      2135   NSW  Australia\n",
       "27           2951 Petterle Place      2756   NSW  Australia\n",
       "28               63 Lukken Drive      2170   NSW  Australia\n",
       "29                833 Luster Way      4005   QLD  Australia\n",
       "...                          ...       ...   ...        ...\n",
       "3969        81609 Vernon Terrace      3934   VIC  Australia\n",
       "3970             37 Hintze Court      2168   NSW  Australia\n",
       "3971            07 Morning Court      3805   VIC  Australia\n",
       "3972         327 Loeprich Street      3187   VIC  Australia\n",
       "3973       5106 Northridge Drive      2560   NSW  Australia\n",
       "3974  9754 High Crossing Terrace      2148   NSW  Australia\n",
       "3975         7 Meadow Vale Court      2114   NSW  Australia\n",
       "3976          80260 Morning Road      3178   VIC  Australia\n",
       "3977     38017 Briar Crest Drive      2165   NSW  Australia\n",
       "3978       60 Morningstar Center      2126   NSW  Australia\n",
       "3979         218 Stuart Junction      2223   NSW  Australia\n",
       "3980          9 Butterfield Lane      4077   QLD  Australia\n",
       "3981        614 Burning Wood Way      2148   NSW  Australia\n",
       "3982              9 Grover Point      4218   QLD  Australia\n",
       "3983            565 Bunting Park      2076   NSW  Australia\n",
       "3984       9461 Saint Paul Trail      2428   NSW  Australia\n",
       "3985          5204 Delaware Pass      2560   NSW  Australia\n",
       "3986               24 Scott Pass      4300   QLD  Australia\n",
       "3987             8 Randy Parkway      2209   NSW  Australia\n",
       "3988           681 Elmside Place      3750   VIC  Australia\n",
       "3989      2918 Summer Ridge Hill      3030   VIC  Australia\n",
       "3990               613 Erie Lane      2088   NSW  Australia\n",
       "3991          0 Transport Center      3977   VIC  Australia\n",
       "3992         4 Dovetail Crossing      2350   NSW  Australia\n",
       "3993        736 Roxbury Junction      2540   NSW  Australia\n",
       "3994             1482 Hauk Trail      3064   VIC  Australia\n",
       "3995   57042 Village Green Point      4511   QLD  Australia\n",
       "3996      87 Crescent Oaks Alley      2756   NSW  Australia\n",
       "3997            8194 Lien Street      4032   QLD  Australia\n",
       "3998             320 Acker Drive      2251   NSW  Australia\n",
       "\n",
       "[3999 rows x 4 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cust_address[['address','postcode', 'state' , 'country']].drop_duplicates()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Duplication Checks"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We need to ensure that there is no duplication of records in the dataset. This may lead to error in data analysis due to poor data quality. If there are duplicate rows of data then we need to drop such records.<br>For checking for duplicate records we need to firstly remove the primary key column of the dataset then apply drop_duplicates() function provided by Python."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of records after removing customer_id (pk), duplicates : 3999\n",
      "Number of records in original dataset : 3999\n"
     ]
    }
   ],
   "source": [
    "# Dropping the primary key column i.e customer_id and storing into a temporary dataframe.\n",
    "cust_address_dedupped = cust_address.drop('customer_id', axis=1).drop_duplicates()\n",
    "\n",
    "print(\"Number of records after removing customer_id (pk), duplicates : {}\".format(cust_address_dedupped.shape[0]))\n",
    "print(\"Number of records in original dataset : {}\".format(cust_address.shape[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>Since both the numbers are same. There are no duplicate records in the dataset</b>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Exporting the Cleaned Customer Demographic Data Set to csv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Currently the Customer Address dataset is clean. Hence we can export the data to a csv to continue our data analysis of Customer Segments by joining it to other tables."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "cust_address.to_csv('CustomerAddress_Cleaned.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Checking for Master-Detail Record Counts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Checking with the Master Table (CustomerDemographic_Cleaned.csv) containing the entire Customer Data for the Customer IDs which are getting dropped from the Customer Address Dataset.<br> \n",
    "<b>Basically these are the Customers who have an address but are not a part of the Demographics dataset yet.</b>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "cust_demo_detail = pd.read_csv('CustomerDemographic_Cleaned.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>gender</th>\n",
       "      <th>past_3_years_bike_related_purchases</th>\n",
       "      <th>DOB</th>\n",
       "      <th>job_title</th>\n",
       "      <th>job_industry_category</th>\n",
       "      <th>wealth_segment</th>\n",
       "      <th>deceased_indicator</th>\n",
       "      <th>owns_car</th>\n",
       "      <th>tenure</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Laraine</td>\n",
       "      <td>Medendorp</td>\n",
       "      <td>Female</td>\n",
       "      <td>93</td>\n",
       "      <td>1953-10-12</td>\n",
       "      <td>Executive Secretary</td>\n",
       "      <td>Health</td>\n",
       "      <td>Mass Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>Yes</td>\n",
       "      <td>11.0</td>\n",
       "      <td>67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Eli</td>\n",
       "      <td>Bockman</td>\n",
       "      <td>Male</td>\n",
       "      <td>81</td>\n",
       "      <td>1980-12-16</td>\n",
       "      <td>Administrative Officer</td>\n",
       "      <td>Financial Services</td>\n",
       "      <td>Mass Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>Yes</td>\n",
       "      <td>16.0</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Arlin</td>\n",
       "      <td>Dearle</td>\n",
       "      <td>Male</td>\n",
       "      <td>61</td>\n",
       "      <td>1954-01-20</td>\n",
       "      <td>Recruiting Manager</td>\n",
       "      <td>Property</td>\n",
       "      <td>Mass Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>Yes</td>\n",
       "      <td>15.0</td>\n",
       "      <td>67</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Talbot</td>\n",
       "      <td>None</td>\n",
       "      <td>Male</td>\n",
       "      <td>33</td>\n",
       "      <td>1961-10-03</td>\n",
       "      <td>Missing</td>\n",
       "      <td>IT</td>\n",
       "      <td>Mass Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>No</td>\n",
       "      <td>7.0</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Sheila-kathryn</td>\n",
       "      <td>Calton</td>\n",
       "      <td>Female</td>\n",
       "      <td>56</td>\n",
       "      <td>1977-05-13</td>\n",
       "      <td>Senior Editor</td>\n",
       "      <td>Missing</td>\n",
       "      <td>Affluent Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>Yes</td>\n",
       "      <td>8.0</td>\n",
       "      <td>43</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id      first_name  last_name  gender  \\\n",
       "0            1         Laraine  Medendorp  Female   \n",
       "1            2             Eli    Bockman    Male   \n",
       "2            3           Arlin     Dearle    Male   \n",
       "3            4          Talbot       None    Male   \n",
       "4            5  Sheila-kathryn     Calton  Female   \n",
       "\n",
       "   past_3_years_bike_related_purchases         DOB               job_title  \\\n",
       "0                                   93  1953-10-12     Executive Secretary   \n",
       "1                                   81  1980-12-16  Administrative Officer   \n",
       "2                                   61  1954-01-20      Recruiting Manager   \n",
       "3                                   33  1961-10-03                 Missing   \n",
       "4                                   56  1977-05-13           Senior Editor   \n",
       "\n",
       "  job_industry_category     wealth_segment deceased_indicator owns_car  \\\n",
       "0                Health      Mass Customer                  N      Yes   \n",
       "1    Financial Services      Mass Customer                  N      Yes   \n",
       "2              Property      Mass Customer                  N      Yes   \n",
       "3                    IT      Mass Customer                  N       No   \n",
       "4               Missing  Affluent Customer                  N      Yes   \n",
       "\n",
       "   tenure  Age  \n",
       "0    11.0   67  \n",
       "1    16.0   40  \n",
       "2    15.0   67  \n",
       "3     7.0   59  \n",
       "4     8.0   43  "
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cust_demo_detail.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total Records in Customer_Demographic_Table : 3912\n",
      "Total Records in Customer_Address_Table : 3999\n",
      "In Demographic Table 87 records are getting dropped due to data cleaning process in Demographic Table\n"
     ]
    }
   ],
   "source": [
    "print(\"Total Records in Customer_Demographic_Table : {}\".format(cust_demo_detail.shape[0]))\n",
    "print(\"Total Records in Customer_Address_Table : {}\".format(cust_address.shape[0]))\n",
    "print('In Demographic Table {} records are getting dropped due to data cleaning process in Demographic Table'\n",
    "      .format(cust_address.shape[0]-cust_demo_detail.shape[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Customer IDs in Address table getting dropped :"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>address</th>\n",
       "      <th>postcode</th>\n",
       "      <th>state</th>\n",
       "      <th>country</th>\n",
       "      <th>property_valuation</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>gender</th>\n",
       "      <th>past_3_years_bike_related_purchases</th>\n",
       "      <th>DOB</th>\n",
       "      <th>job_title</th>\n",
       "      <th>job_industry_category</th>\n",
       "      <th>wealth_segment</th>\n",
       "      <th>deceased_indicator</th>\n",
       "      <th>owns_car</th>\n",
       "      <th>tenure</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>060 Morning Avenue</td>\n",
       "      <td>2016.0</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Laraine</td>\n",
       "      <td>Medendorp</td>\n",
       "      <td>Female</td>\n",
       "      <td>93.0</td>\n",
       "      <td>1953-10-12</td>\n",
       "      <td>Executive Secretary</td>\n",
       "      <td>Health</td>\n",
       "      <td>Mass Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>Yes</td>\n",
       "      <td>11.0</td>\n",
       "      <td>67.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>6 Meadow Vale Court</td>\n",
       "      <td>2153.0</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Eli</td>\n",
       "      <td>Bockman</td>\n",
       "      <td>Male</td>\n",
       "      <td>81.0</td>\n",
       "      <td>1980-12-16</td>\n",
       "      <td>Administrative Officer</td>\n",
       "      <td>Financial Services</td>\n",
       "      <td>Mass Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>Yes</td>\n",
       "      <td>16.0</td>\n",
       "      <td>40.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>0 Holy Cross Court</td>\n",
       "      <td>4211.0</td>\n",
       "      <td>QLD</td>\n",
       "      <td>Australia</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Talbot</td>\n",
       "      <td>None</td>\n",
       "      <td>Male</td>\n",
       "      <td>33.0</td>\n",
       "      <td>1961-10-03</td>\n",
       "      <td>Missing</td>\n",
       "      <td>IT</td>\n",
       "      <td>Mass Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>No</td>\n",
       "      <td>7.0</td>\n",
       "      <td>59.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>17979 Del Mar Point</td>\n",
       "      <td>2448.0</td>\n",
       "      <td>NSW</td>\n",
       "      <td>Australia</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Sheila-kathryn</td>\n",
       "      <td>Calton</td>\n",
       "      <td>Female</td>\n",
       "      <td>56.0</td>\n",
       "      <td>1977-05-13</td>\n",
       "      <td>Senior Editor</td>\n",
       "      <td>Missing</td>\n",
       "      <td>Affluent Customer</td>\n",
       "      <td>N</td>\n",
       "      <td>Yes</td>\n",
       "      <td>8.0</td>\n",
       "      <td>43.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>9 Oakridge Court</td>\n",
       "      <td>3216.0</td>\n",
       "      <td>VIC</td>\n",
       "      <td>Australia</td>\n",
       "      <td>9.0</td>\n",
       "      <td>Curr</td>\n",
       "      <td>Duckhouse</td>\n",
       "      <td>Male</td>\n",
       "      <td>35.0</td>\n",
       "      <td>1966-09-16</td>\n",
       "      <td>Missing</td>\n",
       "      <td>Retail</td>\n",
       "      <td>High Net Worth</td>\n",
       "      <td>N</td>\n",
       "      <td>Yes</td>\n",
       "      <td>13.0</td>\n",
       "      <td>54.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   customer_id              address  postcode state    country  \\\n",
       "0            1   060 Morning Avenue    2016.0   NSW  Australia   \n",
       "1            2  6 Meadow Vale Court    2153.0   NSW  Australia   \n",
       "2            4   0 Holy Cross Court    4211.0   QLD  Australia   \n",
       "3            5  17979 Del Mar Point    2448.0   NSW  Australia   \n",
       "4            6     9 Oakridge Court    3216.0   VIC  Australia   \n",
       "\n",
       "   property_valuation      first_name  last_name  gender  \\\n",
       "0                10.0         Laraine  Medendorp  Female   \n",
       "1                10.0             Eli    Bockman    Male   \n",
       "2                 9.0          Talbot       None    Male   \n",
       "3                 4.0  Sheila-kathryn     Calton  Female   \n",
       "4                 9.0            Curr  Duckhouse    Male   \n",
       "\n",
       "   past_3_years_bike_related_purchases         DOB               job_title  \\\n",
       "0                                 93.0  1953-10-12     Executive Secretary   \n",
       "1                                 81.0  1980-12-16  Administrative Officer   \n",
       "2                                 33.0  1961-10-03                 Missing   \n",
       "3                                 56.0  1977-05-13           Senior Editor   \n",
       "4                                 35.0  1966-09-16                 Missing   \n",
       "\n",
       "  job_industry_category     wealth_segment deceased_indicator owns_car  \\\n",
       "0                Health      Mass Customer                  N      Yes   \n",
       "1    Financial Services      Mass Customer                  N      Yes   \n",
       "2                    IT      Mass Customer                  N       No   \n",
       "3               Missing  Affluent Customer                  N      Yes   \n",
       "4                Retail     High Net Worth                  N      Yes   \n",
       "\n",
       "   tenure   Age  \n",
       "0    11.0  67.0  \n",
       "1    16.0  40.0  \n",
       "2     7.0  59.0  \n",
       "3     8.0  43.0  \n",
       "4    13.0  54.0  "
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cust_drop = cust_address.merge(cust_demo_detail , left_on = 'customer_id', right_on='customer_id'\n",
    "                     , how='outer')\n",
    "cust_drop.head()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}